Add -LP and -LS (list properties, list scripts) features to pcre2test.
This commit is contained in:
parent
68fbc1982e
commit
bf35c0518c
|
@ -39,7 +39,8 @@ pcre2_substitute(), and the replacement argument of the latter, if the pointer
|
||||||
is NULL and the length is zero, treat as an empty string. Apparently a number
|
is NULL and the length is zero, treat as an empty string. Apparently a number
|
||||||
of applications treat NULL/0 in this way.
|
of applications treat NULL/0 in this way.
|
||||||
|
|
||||||
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
|
14. Added support for Bidi_Class and a number of binary Unicode properties,
|
||||||
|
including Bidi_Control.
|
||||||
|
|
||||||
15. Fix some minor issues raised by clang sanitize.
|
15. Fix some minor issues raised by clang sanitize.
|
||||||
|
|
||||||
|
@ -76,6 +77,8 @@ misaligned the frame that follows, resulting in an alignment fault when storing
|
||||||
a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
|
a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
|
||||||
Clarke PR#72.
|
Clarke PR#72.
|
||||||
|
|
||||||
|
20. Added -LP and -LS listing options to pcre2test.
|
||||||
|
|
||||||
|
|
||||||
Version 10.39 29-October-2021
|
Version 10.39 29-October-2021
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "28 November 2021" "PCRE 10.40"
|
.TH PCRE2TEST 1 "12 January 2022" "PCRE 10.40"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified.
|
||||||
\fB-LM\fP
|
\fB-LM\fP
|
||||||
List modifiers: write a list of available pattern and subject modifiers to the
|
List modifiers: write a list of available pattern and subject modifiers to the
|
||||||
standard output, then exit with zero exit code. All other options are ignored.
|
standard output, then exit with zero exit code. All other options are ignored.
|
||||||
If both -C and -LM are present, whichever is first is recognized.
|
If both -C and any -Lx options are present, whichever is first is recognized.
|
||||||
|
.TP 10
|
||||||
|
\fB-LP\fP
|
||||||
|
List properties: write a list of recognized Unicode properties to the standard
|
||||||
|
output, then exit with zero exit code. All other options are ignored. If both
|
||||||
|
-C and any -Lx options are present, whichever is first is recognized.
|
||||||
|
.TP 10
|
||||||
|
\fB-LS\fP
|
||||||
|
List scripts: write a list of recogized Unicode script names to the standard
|
||||||
|
output, then exit with zero exit code. All other options are ignored. If both
|
||||||
|
-C and any -Lx options are present, whichever is first is recognized.
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB-pattern\fP \fImodifier-list\fP
|
\fB-pattern\fP \fImodifier-list\fP
|
||||||
Behave as if each pattern line contains the given modifiers.
|
Behave as if each pattern line contains the given modifiers.
|
||||||
|
@ -2109,6 +2119,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 28 November 2021
|
Last updated: 12 January 2022
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
214
src/pcre2test.c
214
src/pcre2test.c
|
@ -5490,7 +5490,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
|
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
|
||||||
{
|
{
|
||||||
show_compile_options(
|
show_compile_options(
|
||||||
pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
|
pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
|
||||||
msg, "");
|
msg, "");
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
@ -5499,7 +5499,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
|
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
|
||||||
{
|
{
|
||||||
show_compile_extra_options(
|
show_compile_extra_options(
|
||||||
FLD(pat_context, extra_options) &
|
FLD(pat_context, extra_options) &
|
||||||
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
|
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
@ -5509,7 +5509,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
{
|
{
|
||||||
show_controls(
|
show_controls(
|
||||||
pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
|
pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
|
||||||
pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
|
pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
|
||||||
msg);
|
msg);
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
@ -7310,7 +7310,7 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
uint8_t *pr;
|
uint8_t *pr;
|
||||||
uint8_t rbuffer[REPLACE_BUFFSIZE];
|
uint8_t rbuffer[REPLACE_BUFFSIZE];
|
||||||
uint8_t nbuffer[REPLACE_BUFFSIZE];
|
uint8_t nbuffer[REPLACE_BUFFSIZE];
|
||||||
uint8_t *rbptr;
|
uint8_t *rbptr;
|
||||||
uint32_t xoptions;
|
uint32_t xoptions;
|
||||||
uint32_t emoption; /* External match option */
|
uint32_t emoption; /* External match option */
|
||||||
PCRE2_SIZE j, rlen, nsize, erroroffset;
|
PCRE2_SIZE j, rlen, nsize, erroroffset;
|
||||||
|
@ -7460,10 +7460,10 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
{
|
{
|
||||||
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There is a special option to set the replacement to NULL in order to test
|
/* There is a special option to set the replacement to NULL in order to test
|
||||||
that case. */
|
that case. */
|
||||||
|
|
||||||
rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
|
rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
|
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
|
||||||
|
@ -7655,15 +7655,15 @@ for (gmatched = 0;; gmatched++)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The result of the match is now in capcount. First handle a successful
|
/* The result of the match is now in capcount. First handle a successful
|
||||||
match. If pp was forced to be NULL (to test NULL handling) it will have been
|
match. If pp was forced to be NULL (to test NULL handling) it will have been
|
||||||
treated as an empty string if the length was zero. So re-create that for
|
treated as an empty string if the length was zero. So re-create that for
|
||||||
outputting. */
|
outputting. */
|
||||||
|
|
||||||
if (capcount >= 0)
|
if (capcount >= 0)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (pp == NULL) pp = (uint8_t *)"";
|
if (pp == NULL) pp = (uint8_t *)"";
|
||||||
|
|
||||||
if (capcount > (int)oveccount) /* Check for lunatic return value */
|
if (capcount > (int)oveccount) /* Check for lunatic return value */
|
||||||
{
|
{
|
||||||
|
@ -8251,6 +8251,8 @@ printf(" -jit set default pattern modifier 'jit'\n");
|
||||||
printf(" -jitfast set default pattern modifier 'jitfast'\n");
|
printf(" -jitfast set default pattern modifier 'jitfast'\n");
|
||||||
printf(" -jitverify set default pattern modifier 'jitverify'\n");
|
printf(" -jitverify set default pattern modifier 'jitverify'\n");
|
||||||
printf(" -LM list pattern and subject modifiers, then exit\n");
|
printf(" -LM list pattern and subject modifiers, then exit\n");
|
||||||
|
printf(" -LP list non-script properties, then exit\n");
|
||||||
|
printf(" -LS list supported scripts, then exit\n");
|
||||||
printf(" -q quiet: do not output PCRE2 version number at start\n");
|
printf(" -q quiet: do not output PCRE2 version number at start\n");
|
||||||
printf(" -pattern <s> set default pattern modifier fields\n");
|
printf(" -pattern <s> set default pattern modifier fields\n");
|
||||||
printf(" -subject <s> set default subject modifier fields\n");
|
printf(" -subject <s> set default subject modifier fields\n");
|
||||||
|
@ -8431,6 +8433,166 @@ return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Format one property/script list item *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
static void
|
||||||
|
format_list_item(int16_t *ff, char *buff, BOOL isscript)
|
||||||
|
{
|
||||||
|
int count;
|
||||||
|
int maxi = 0;
|
||||||
|
const char *maxs = "";
|
||||||
|
size_t max = 0;
|
||||||
|
|
||||||
|
for (count = 0; ff[count] >= 0; count++) {}
|
||||||
|
|
||||||
|
/* Find the name to put first. For scripts, any 3-character name is chosen.
|
||||||
|
For non-scripts, or if there is no 3-character name, take the longest. */
|
||||||
|
|
||||||
|
for (int i = 0; ff[i] >= 0; i++)
|
||||||
|
{
|
||||||
|
const char *s = PRIV(utt_names) + ff[i];
|
||||||
|
size_t len = strlen(s);
|
||||||
|
if (isscript && len == 3)
|
||||||
|
{
|
||||||
|
maxi = i;
|
||||||
|
max = len;
|
||||||
|
maxs = s;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (len > max)
|
||||||
|
{
|
||||||
|
max = len;
|
||||||
|
maxi = i;
|
||||||
|
maxs = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
strcpy(buff, maxs);
|
||||||
|
buff += max;
|
||||||
|
|
||||||
|
if (count > 1)
|
||||||
|
{
|
||||||
|
const char *sep = " (";
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
if (i == maxi) continue;
|
||||||
|
buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
|
||||||
|
sep = ", ";
|
||||||
|
}
|
||||||
|
(void)sprintf(buff, ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Display scripts or properties *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#define MAX_SYNONYMS 5
|
||||||
|
|
||||||
|
static void
|
||||||
|
display_properties(BOOL wantscripts)
|
||||||
|
{
|
||||||
|
#ifndef SUPPORT_UNICODE
|
||||||
|
printf("** This version of PCRE2 was compiled without Unicode support.\n");
|
||||||
|
#else
|
||||||
|
|
||||||
|
const char *typename;
|
||||||
|
uint16_t seentypes[1024];
|
||||||
|
uint16_t seenvalues[1024];
|
||||||
|
int seencount = 0;
|
||||||
|
int16_t found[256][MAX_SYNONYMS + 1];
|
||||||
|
int fc = 0;
|
||||||
|
int colwidth = 40;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (wantscripts)
|
||||||
|
{
|
||||||
|
n = ucp_Script_Count;
|
||||||
|
typename = "SCRIPTS";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
n = ucp_Bprop_Count;
|
||||||
|
typename = "PROPERTIES";
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < PRIV(utt_size); i++)
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
int m = 0;
|
||||||
|
int16_t *fv;
|
||||||
|
const ucp_type_table *t = PRIV(utt) + i;
|
||||||
|
unsigned int value = t->value;
|
||||||
|
|
||||||
|
if (wantscripts)
|
||||||
|
{
|
||||||
|
if (t->type != PT_SC && t->type != PT_SCX) continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (t->type != PT_BOOL) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = 0; k < seencount; k++)
|
||||||
|
{
|
||||||
|
if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
|
||||||
|
}
|
||||||
|
if (k < seencount) continue;
|
||||||
|
|
||||||
|
seentypes[seencount] = t->type;
|
||||||
|
seenvalues[seencount++] = t->value;
|
||||||
|
|
||||||
|
fv = found[fc++];
|
||||||
|
fv[m++] = t->name_offset;
|
||||||
|
|
||||||
|
for (size_t j = i + 1; j < PRIV(utt_size); j++)
|
||||||
|
{
|
||||||
|
const ucp_type_table *tt = PRIV(utt) + j;
|
||||||
|
if (tt->type != t->type || tt->value != value) continue;
|
||||||
|
if (m >= MAX_SYNONYMS)
|
||||||
|
printf("** Too many synonyms: %s ignored\n",
|
||||||
|
PRIV(utt_names) + tt->name_offset);
|
||||||
|
else fv[m++] = tt->name_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
fv[m] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("-------------------------- SUPPORTED %s --------------------------\n\n",
|
||||||
|
typename);
|
||||||
|
|
||||||
|
if (!wantscripts) printf(
|
||||||
|
"This release of PCRE2 supports Unicode's general category properties such\n"
|
||||||
|
"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
|
||||||
|
"and the following binary (yes/no) properties:\n\n");
|
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k < (n+1)/2; k++)
|
||||||
|
{
|
||||||
|
int x;
|
||||||
|
char buff1[128];
|
||||||
|
char buff2[128];
|
||||||
|
|
||||||
|
format_list_item(found[k], buff1, wantscripts);
|
||||||
|
x = k + (n+1)/2;
|
||||||
|
if (x < n) format_list_item(found[x], buff2, wantscripts);
|
||||||
|
else buff2[0] = 0;
|
||||||
|
|
||||||
|
x = printf("%s", buff1);
|
||||||
|
while (x++ < colwidth) printf(" ");
|
||||||
|
printf("%s\n", buff2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Display one modifier *
|
* Display one modifier *
|
||||||
|
@ -8445,8 +8607,8 @@ printf("%c%s", c, m->name);
|
||||||
for (size_t i = 0; i < C1MODLISTCOUNT; i++)
|
for (size_t i = 0; i < C1MODLISTCOUNT; i++)
|
||||||
{
|
{
|
||||||
if (strcmp(m->name, c1modlist[i].fullname) == 0)
|
if (strcmp(m->name, c1modlist[i].fullname) == 0)
|
||||||
printf(" (%c)", c1modlist[i].onechar);
|
printf(" (%c)", c1modlist[i].onechar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -8505,19 +8667,19 @@ for (i = 0; i < MODLISTCOUNT; i++)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (for_pattern == is_pattern)
|
if (for_pattern == is_pattern)
|
||||||
{
|
{
|
||||||
extra[n] = 0;
|
extra[n] = 0;
|
||||||
for (size_t k = 0; k < C1MODLISTCOUNT; k++)
|
for (size_t k = 0; k < C1MODLISTCOUNT; k++)
|
||||||
{
|
{
|
||||||
if (strcmp(m->name, c1modlist[k].fullname) == 0)
|
if (strcmp(m->name, c1modlist[k].fullname) == 0)
|
||||||
{
|
{
|
||||||
extra[n] += 4;
|
extra[n] += 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list[n++] = i;
|
list[n++] = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now print from the list in two columns. */
|
/* Now print from the list in two columns. */
|
||||||
|
@ -8672,6 +8834,22 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
|
||||||
goto EXIT;
|
goto EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* List properties and exit */
|
||||||
|
|
||||||
|
if (strcmp(arg, "-LP") == 0)
|
||||||
|
{
|
||||||
|
display_properties(FALSE);
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* List scripts and exit */
|
||||||
|
|
||||||
|
if (strcmp(arg, "-LS") == 0)
|
||||||
|
{
|
||||||
|
display_properties(TRUE);
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
/* Display and/or set return code for configuration options. */
|
/* Display and/or set return code for configuration options. */
|
||||||
|
|
||||||
if (strcmp(arg, "-C") == 0)
|
if (strcmp(arg, "-C") == 0)
|
||||||
|
|
Loading…
Reference in New Issue