Update comments and correct name of memmove substitute function.
This commit is contained in:
parent
41ebeb4290
commit
36f06405b7
|
@ -2,7 +2,7 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
|
@ -177,7 +177,7 @@ by "configure". */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
|
||||||
option on the command line. */
|
option on the command line. */
|
||||||
|
|
||||||
#ifdef VPCOMPAT
|
#ifdef VPCOMPAT
|
||||||
|
@ -200,7 +200,7 @@ neither (there some non-Unix environments where this is the case). */
|
||||||
#define memmove(a, b, c) bcopy(b, a, c)
|
#define memmove(a, b, c) bcopy(b, a, c)
|
||||||
#else /* HAVE_BCOPY */
|
#else /* HAVE_BCOPY */
|
||||||
static void *
|
static void *
|
||||||
pcre_memmove(void *d, const void *s, size_t n)
|
pcre2_memmove(void *d, const void *s, size_t n)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
unsigned char *dest = (unsigned char *)d;
|
unsigned char *dest = (unsigned char *)d;
|
||||||
|
@ -218,7 +218,7 @@ else
|
||||||
return (void *)(dest - n);
|
return (void *)(dest - n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
#define memmove(a, b, c) pcre2_memmove(a, b, c)
|
||||||
#endif /* not HAVE_BCOPY */
|
#endif /* not HAVE_BCOPY */
|
||||||
#endif /* not HAVE_MEMMOVE */
|
#endif /* not HAVE_MEMMOVE */
|
||||||
#endif /* not VPCOMPAT */
|
#endif /* not VPCOMPAT */
|
||||||
|
@ -397,7 +397,7 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||||
space. However, in many other sources it is listed as a space and has been in
|
space. However, in many other sources it is listed as a space and has been in
|
||||||
PCRE for a long time. */
|
PCRE (both APIs) for a long time. */
|
||||||
|
|
||||||
#define HSPACE_LIST \
|
#define HSPACE_LIST \
|
||||||
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
|
||||||
|
@ -565,17 +565,9 @@ req_unit match. */
|
||||||
|
|
||||||
#define REQ_CU_MAX 1000
|
#define REQ_CU_MAX 1000
|
||||||
|
|
||||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
/* Offsets for the bitmap tables in the cbits set of tables. Each table
|
||||||
|
contains a set of bits for a class map. Some classes are built by combining
|
||||||
#define ctype_space 0x01
|
these tables. */
|
||||||
#define ctype_letter 0x02
|
|
||||||
#define ctype_digit 0x04
|
|
||||||
#define ctype_xdigit 0x08
|
|
||||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
|
||||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
|
||||||
|
|
||||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
|
||||||
of bits for a class map. Some classes are built by combining these tables. */
|
|
||||||
|
|
||||||
#define cbit_space 0 /* [:space:] or \s */
|
#define cbit_space 0 /* [:space:] or \s */
|
||||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||||
|
@ -589,19 +581,28 @@ of bits for a class map. Some classes are built by combining these tables. */
|
||||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||||
#define cbit_length 320 /* Length of the cbits table */
|
#define cbit_length 320 /* Length of the cbits table */
|
||||||
|
|
||||||
/* Offsets of the various tables from the base tables pointer, and
|
/* Bit definitions for entries in the ctypes table. */
|
||||||
total length. */
|
|
||||||
|
|
||||||
#define lcc_offset 0
|
#define ctype_space 0x01
|
||||||
#define fcc_offset 256
|
#define ctype_letter 0x02
|
||||||
#define cbits_offset 512
|
#define ctype_digit 0x04
|
||||||
#define ctypes_offset (cbits_offset + cbit_length)
|
#define ctype_xdigit 0x08
|
||||||
|
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||||
|
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||||
|
|
||||||
|
/* Offsets of the various tables from the base tables pointer, and
|
||||||
|
total length of the tables. */
|
||||||
|
|
||||||
|
#define lcc_offset 0 /* Lower case */
|
||||||
|
#define fcc_offset 256 /* Flip case */
|
||||||
|
#define cbits_offset 512 /* Character classes */
|
||||||
|
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||||
#define tables_length (ctypes_offset + 256)
|
#define tables_length (ctypes_offset + 256)
|
||||||
|
|
||||||
|
|
||||||
/* -------------------- Character and string names ------------------------ */
|
/* -------------------- Character and string names ------------------------ */
|
||||||
|
|
||||||
/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||||
character constants like '*' because the compiler would emit their EBCDIC code,
|
character constants like '*' because the compiler would emit their EBCDIC code,
|
||||||
which is different from their ASCII/UTF-8 code. Instead we define macros for
|
which is different from their ASCII/UTF-8 code. Instead we define macros for
|
||||||
the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
|
the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
|
||||||
|
@ -609,7 +610,7 @@ is enabled. When UTF-8 support is not enabled, the definitions use character
|
||||||
literals. Both character and string versions of each character are needed, and
|
literals. Both character and string versions of each character are needed, and
|
||||||
there are some longer strings as well.
|
there are some longer strings as well.
|
||||||
|
|
||||||
This means that, on EBCDIC platforms, the PCRE library can handle either
|
This means that, on EBCDIC platforms, the PCRE2 library can handle either
|
||||||
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
EBCDIC, or UTF-8, but not both. To support both in the same compiled library
|
||||||
would need different lookups depending on whether PCRE2_UTF was set or not.
|
would need different lookups depending on whether PCRE2_UTF was set or not.
|
||||||
This would make it impossible to use characters in switch/case statements,
|
This would make it impossible to use characters in switch/case statements,
|
||||||
|
@ -621,7 +622,7 @@ macros to give the functions distinct names. */
|
||||||
#ifndef SUPPORT_UNICODE
|
#ifndef SUPPORT_UNICODE
|
||||||
|
|
||||||
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
/* UTF-8 support is not enabled; use the platform-dependent character literals
|
||||||
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
|
so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
|
||||||
mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
|
mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
|
||||||
characters, a common practice has been to use its NL (0x15) character as the
|
characters, a common practice has been to use its NL (0x15) character as the
|
||||||
line terminator in C-like processing environments. However, sometimes the LF
|
line terminator in C-like processing environments. However, sometimes the LF
|
||||||
|
@ -629,7 +630,7 @@ line terminator in C-like processing environments. However, sometimes the LF
|
||||||
|
|
||||||
http://unicode.org/standard/reports/tr13/tr13-5.html
|
http://unicode.org/standard/reports/tr13/tr13-5.html
|
||||||
|
|
||||||
PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
|
PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
|
||||||
instead. Whichever is *not* chosen is defined as NEL.
|
instead. Whichever is *not* chosen is defined as NEL.
|
||||||
|
|
||||||
In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
|
In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
|
||||||
|
@ -1234,7 +1235,7 @@ only. */
|
||||||
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
|
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
|
||||||
|
|
||||||
/* The following special properties are used only in XCLASS items, when POSIX
|
/* The following special properties are used only in XCLASS items, when POSIX
|
||||||
classes are specified and PCRE_UCP is set - in other words, for Unicode
|
classes are specified and PCRE2_UCP is set - in other words, for Unicode
|
||||||
handling of these classes. They are not available via the \p or \P escapes like
|
handling of these classes. They are not available via the \p or \P escapes like
|
||||||
those in the above list, and so they do not take part in the autopossessifying
|
those in the above list, and so they do not take part in the autopossessifying
|
||||||
table. */
|
table. */
|
||||||
|
@ -1298,7 +1299,7 @@ compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
|
||||||
like \N.
|
like \N.
|
||||||
|
|
||||||
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
|
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
|
||||||
when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
|
when PCRE2_UCP is set and replacement of \d etc by \p sequences is required.
|
||||||
They must be contiguous, and remain in order so that the replacements can be
|
They must be contiguous, and remain in order so that the replacements can be
|
||||||
looked up from a table.
|
looked up from a table.
|
||||||
|
|
||||||
|
@ -1323,12 +1324,12 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||||
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||||
order to the list of escapes immediately above. Furthermore, values up to
|
order to the list of escapes immediately above. Furthermore, values up to
|
||||||
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||||
pcre_compile.c
|
pcre2_auto_possess.c
|
||||||
|
|
||||||
Whenever this list is updated, the two macro definitions that follow must be
|
Whenever this list is updated, the two macro definitions that follow must be
|
||||||
updated to match. The possessification table called "opcode_possessify" in
|
updated to match. The possessification table called "opcode_possessify" in
|
||||||
pcre_compile.c must also be updated, and also the tables called "coptable"
|
pcre2_compile.c must also be updated, and also the tables called "coptable"
|
||||||
and "poptable" in pcre_dfa_exec.c.
|
and "poptable" in pcre2_dfa_match.c.
|
||||||
|
|
||||||
****** NOTE NOTE NOTE ******/
|
****** NOTE NOTE NOTE ******/
|
||||||
|
|
||||||
|
@ -1905,7 +1906,7 @@ private structures. */
|
||||||
|
|
||||||
/* Private "external" functions. These are internal functions that are called
|
/* Private "external" functions. These are internal functions that are called
|
||||||
from modules other than the one in which they are defined. They have to be
|
from modules other than the one in which they are defined. They have to be
|
||||||
"external" in the C sense, but are not part of the PCRE public API. They are
|
"external" in the C sense, but are not part of the PCRE2 public API. They are
|
||||||
not referenced from pcre2test, and must not be defined when no code unit width
|
not referenced from pcre2test, and must not be defined when no code unit width
|
||||||
is available. */
|
is available. */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue