Add -LP and -LS (list properties, list scripts) features to pcre2test.
This commit is contained in:
parent
68fbc1982e
commit
bf35c0518c
|
@ -39,7 +39,8 @@ pcre2_substitute(), and the replacement argument of the latter, if the pointer
|
|||
is NULL and the length is zero, treat as an empty string. Apparently a number
|
||||
of applications treat NULL/0 in this way.
|
||||
|
||||
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
|
||||
14. Added support for Bidi_Class and a number of binary Unicode properties,
|
||||
including Bidi_Control.
|
||||
|
||||
15. Fix some minor issues raised by clang sanitize.
|
||||
|
||||
|
@ -76,6 +77,8 @@ misaligned the frame that follows, resulting in an alignment fault when storing
|
|||
a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
|
||||
Clarke PR#72.
|
||||
|
||||
20. Added -LP and -LS listing options to pcre2test.
|
||||
|
||||
|
||||
Version 10.39 29-October-2021
|
||||
-----------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "28 November 2021" "PCRE 10.40"
|
||||
.TH PCRE2TEST 1 "12 January 2022" "PCRE 10.40"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified.
|
|||
\fB-LM\fP
|
||||
List modifiers: write a list of available pattern and subject modifiers to the
|
||||
standard output, then exit with zero exit code. All other options are ignored.
|
||||
If both -C and -LM are present, whichever is first is recognized.
|
||||
If both -C and any -Lx options are present, whichever is first is recognized.
|
||||
.TP 10
|
||||
\fB-LP\fP
|
||||
List properties: write a list of recognized Unicode properties to the standard
|
||||
output, then exit with zero exit code. All other options are ignored. If both
|
||||
-C and any -Lx options are present, whichever is first is recognized.
|
||||
.TP 10
|
||||
\fB-LS\fP
|
||||
List scripts: write a list of recogized Unicode script names to the standard
|
||||
output, then exit with zero exit code. All other options are ignored. If both
|
||||
-C and any -Lx options are present, whichever is first is recognized.
|
||||
.TP 10
|
||||
\fB-pattern\fP \fImodifier-list\fP
|
||||
Behave as if each pattern line contains the given modifiers.
|
||||
|
@ -2109,6 +2119,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 28 November 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
Last updated: 12 January 2022
|
||||
Copyright (c) 1997-2022 University of Cambridge.
|
||||
.fi
|
||||
|
|
178
src/pcre2test.c
178
src/pcre2test.c
|
@ -8251,6 +8251,8 @@ printf(" -jit set default pattern modifier 'jit'\n");
|
|||
printf(" -jitfast set default pattern modifier 'jitfast'\n");
|
||||
printf(" -jitverify set default pattern modifier 'jitverify'\n");
|
||||
printf(" -LM list pattern and subject modifiers, then exit\n");
|
||||
printf(" -LP list non-script properties, then exit\n");
|
||||
printf(" -LS list supported scripts, then exit\n");
|
||||
printf(" -q quiet: do not output PCRE2 version number at start\n");
|
||||
printf(" -pattern <s> set default pattern modifier fields\n");
|
||||
printf(" -subject <s> set default subject modifier fields\n");
|
||||
|
@ -8431,6 +8433,166 @@ return 0;
|
|||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Format one property/script list item *
|
||||
*************************************************/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
static void
|
||||
format_list_item(int16_t *ff, char *buff, BOOL isscript)
|
||||
{
|
||||
int count;
|
||||
int maxi = 0;
|
||||
const char *maxs = "";
|
||||
size_t max = 0;
|
||||
|
||||
for (count = 0; ff[count] >= 0; count++) {}
|
||||
|
||||
/* Find the name to put first. For scripts, any 3-character name is chosen.
|
||||
For non-scripts, or if there is no 3-character name, take the longest. */
|
||||
|
||||
for (int i = 0; ff[i] >= 0; i++)
|
||||
{
|
||||
const char *s = PRIV(utt_names) + ff[i];
|
||||
size_t len = strlen(s);
|
||||
if (isscript && len == 3)
|
||||
{
|
||||
maxi = i;
|
||||
max = len;
|
||||
maxs = s;
|
||||
break;
|
||||
}
|
||||
else if (len > max)
|
||||
{
|
||||
max = len;
|
||||
maxi = i;
|
||||
maxs = s;
|
||||
}
|
||||
}
|
||||
|
||||
strcpy(buff, maxs);
|
||||
buff += max;
|
||||
|
||||
if (count > 1)
|
||||
{
|
||||
const char *sep = " (";
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
if (i == maxi) continue;
|
||||
buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
|
||||
sep = ", ";
|
||||
}
|
||||
(void)sprintf(buff, ")");
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Display scripts or properties *
|
||||
*************************************************/
|
||||
|
||||
#define MAX_SYNONYMS 5
|
||||
|
||||
static void
|
||||
display_properties(BOOL wantscripts)
|
||||
{
|
||||
#ifndef SUPPORT_UNICODE
|
||||
printf("** This version of PCRE2 was compiled without Unicode support.\n");
|
||||
#else
|
||||
|
||||
const char *typename;
|
||||
uint16_t seentypes[1024];
|
||||
uint16_t seenvalues[1024];
|
||||
int seencount = 0;
|
||||
int16_t found[256][MAX_SYNONYMS + 1];
|
||||
int fc = 0;
|
||||
int colwidth = 40;
|
||||
int n;
|
||||
|
||||
if (wantscripts)
|
||||
{
|
||||
n = ucp_Script_Count;
|
||||
typename = "SCRIPTS";
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ucp_Bprop_Count;
|
||||
typename = "PROPERTIES";
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < PRIV(utt_size); i++)
|
||||
{
|
||||
int k;
|
||||
int m = 0;
|
||||
int16_t *fv;
|
||||
const ucp_type_table *t = PRIV(utt) + i;
|
||||
unsigned int value = t->value;
|
||||
|
||||
if (wantscripts)
|
||||
{
|
||||
if (t->type != PT_SC && t->type != PT_SCX) continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (t->type != PT_BOOL) continue;
|
||||
}
|
||||
|
||||
for (k = 0; k < seencount; k++)
|
||||
{
|
||||
if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
|
||||
}
|
||||
if (k < seencount) continue;
|
||||
|
||||
seentypes[seencount] = t->type;
|
||||
seenvalues[seencount++] = t->value;
|
||||
|
||||
fv = found[fc++];
|
||||
fv[m++] = t->name_offset;
|
||||
|
||||
for (size_t j = i + 1; j < PRIV(utt_size); j++)
|
||||
{
|
||||
const ucp_type_table *tt = PRIV(utt) + j;
|
||||
if (tt->type != t->type || tt->value != value) continue;
|
||||
if (m >= MAX_SYNONYMS)
|
||||
printf("** Too many synonyms: %s ignored\n",
|
||||
PRIV(utt_names) + tt->name_offset);
|
||||
else fv[m++] = tt->name_offset;
|
||||
}
|
||||
|
||||
fv[m] = -1;
|
||||
}
|
||||
|
||||
printf("-------------------------- SUPPORTED %s --------------------------\n\n",
|
||||
typename);
|
||||
|
||||
if (!wantscripts) printf(
|
||||
"This release of PCRE2 supports Unicode's general category properties such\n"
|
||||
"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
|
||||
"and the following binary (yes/no) properties:\n\n");
|
||||
|
||||
|
||||
for (int k = 0; k < (n+1)/2; k++)
|
||||
{
|
||||
int x;
|
||||
char buff1[128];
|
||||
char buff2[128];
|
||||
|
||||
format_list_item(found[k], buff1, wantscripts);
|
||||
x = k + (n+1)/2;
|
||||
if (x < n) format_list_item(found[x], buff2, wantscripts);
|
||||
else buff2[0] = 0;
|
||||
|
||||
x = printf("%s", buff1);
|
||||
while (x++ < colwidth) printf(" ");
|
||||
printf("%s\n", buff2);
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Display one modifier *
|
||||
|
@ -8672,6 +8834,22 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
|
|||
goto EXIT;
|
||||
}
|
||||
|
||||
/* List properties and exit */
|
||||
|
||||
if (strcmp(arg, "-LP") == 0)
|
||||
{
|
||||
display_properties(FALSE);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* List scripts and exit */
|
||||
|
||||
if (strcmp(arg, "-LS") == 0)
|
||||
{
|
||||
display_properties(TRUE);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Display and/or set return code for configuration options. */
|
||||
|
||||
if (strcmp(arg, "-C") == 0)
|
||||
|
|
Loading…
Reference in New Issue