Add -LP and -LS (list properties, list scripts) features to pcre2test.

This commit is contained in:
Philip Hazel 2022-01-12 15:01:14 +00:00
parent 68fbc1982e
commit bf35c0518c
3 changed files with 214 additions and 23 deletions

View File

@ -39,7 +39,8 @@ pcre2_substitute(), and the replacement argument of the latter, if the pointer
is NULL and the length is zero, treat as an empty string. Apparently a number
of applications treat NULL/0 in this way.
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
14. Added support for Bidi_Class and a number of binary Unicode properties,
including Bidi_Control.
15. Fix some minor issues raised by clang sanitize.
@ -76,6 +77,8 @@ misaligned the frame that follows, resulting in an alignment fault when storing
a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
Clarke PR#72.
20. Added -LP and -LS listing options to pcre2test.
Version 10.39 29-October-2021
-----------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "28 November 2021" "PCRE 10.40"
.TH PCRE2TEST 1 "12 January 2022" "PCRE 10.40"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified.
\fB-LM\fP
List modifiers: write a list of available pattern and subject modifiers to the
standard output, then exit with zero exit code. All other options are ignored.
If both -C and -LM are present, whichever is first is recognized.
If both -C and any -Lx options are present, whichever is first is recognized.
.TP 10
\fB-LP\fP
List properties: write a list of recognized Unicode properties to the standard
output, then exit with zero exit code. All other options are ignored. If both
-C and any -Lx options are present, whichever is first is recognized.
.TP 10
\fB-LS\fP
List scripts: write a list of recogized Unicode script names to the standard
output, then exit with zero exit code. All other options are ignored. If both
-C and any -Lx options are present, whichever is first is recognized.
.TP 10
\fB-pattern\fP \fImodifier-list\fP
Behave as if each pattern line contains the given modifiers.
@ -2109,6 +2119,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 28 November 2021
Copyright (c) 1997-2021 University of Cambridge.
Last updated: 12 January 2022
Copyright (c) 1997-2022 University of Cambridge.
.fi

View File

@ -8251,6 +8251,8 @@ printf(" -jit set default pattern modifier 'jit'\n");
printf(" -jitfast set default pattern modifier 'jitfast'\n");
printf(" -jitverify set default pattern modifier 'jitverify'\n");
printf(" -LM list pattern and subject modifiers, then exit\n");
printf(" -LP list non-script properties, then exit\n");
printf(" -LS list supported scripts, then exit\n");
printf(" -q quiet: do not output PCRE2 version number at start\n");
printf(" -pattern <s> set default pattern modifier fields\n");
printf(" -subject <s> set default subject modifier fields\n");
@ -8431,6 +8433,166 @@ return 0;
}
/*************************************************
* Format one property/script list item *
*************************************************/
#ifdef SUPPORT_UNICODE
static void
format_list_item(int16_t *ff, char *buff, BOOL isscript)
{
int count;
int maxi = 0;
const char *maxs = "";
size_t max = 0;
for (count = 0; ff[count] >= 0; count++) {}
/* Find the name to put first. For scripts, any 3-character name is chosen.
For non-scripts, or if there is no 3-character name, take the longest. */
for (int i = 0; ff[i] >= 0; i++)
{
const char *s = PRIV(utt_names) + ff[i];
size_t len = strlen(s);
if (isscript && len == 3)
{
maxi = i;
max = len;
maxs = s;
break;
}
else if (len > max)
{
max = len;
maxi = i;
maxs = s;
}
}
strcpy(buff, maxs);
buff += max;
if (count > 1)
{
const char *sep = " (";
for (int i = 0; i < count; i++)
{
if (i == maxi) continue;
buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
sep = ", ";
}
(void)sprintf(buff, ")");
}
}
#endif /* SUPPORT_UNICODE */
/*************************************************
* Display scripts or properties *
*************************************************/
#define MAX_SYNONYMS 5
static void
display_properties(BOOL wantscripts)
{
#ifndef SUPPORT_UNICODE
printf("** This version of PCRE2 was compiled without Unicode support.\n");
#else
const char *typename;
uint16_t seentypes[1024];
uint16_t seenvalues[1024];
int seencount = 0;
int16_t found[256][MAX_SYNONYMS + 1];
int fc = 0;
int colwidth = 40;
int n;
if (wantscripts)
{
n = ucp_Script_Count;
typename = "SCRIPTS";
}
else
{
n = ucp_Bprop_Count;
typename = "PROPERTIES";
}
for (size_t i = 0; i < PRIV(utt_size); i++)
{
int k;
int m = 0;
int16_t *fv;
const ucp_type_table *t = PRIV(utt) + i;
unsigned int value = t->value;
if (wantscripts)
{
if (t->type != PT_SC && t->type != PT_SCX) continue;
}
else
{
if (t->type != PT_BOOL) continue;
}
for (k = 0; k < seencount; k++)
{
if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
}
if (k < seencount) continue;
seentypes[seencount] = t->type;
seenvalues[seencount++] = t->value;
fv = found[fc++];
fv[m++] = t->name_offset;
for (size_t j = i + 1; j < PRIV(utt_size); j++)
{
const ucp_type_table *tt = PRIV(utt) + j;
if (tt->type != t->type || tt->value != value) continue;
if (m >= MAX_SYNONYMS)
printf("** Too many synonyms: %s ignored\n",
PRIV(utt_names) + tt->name_offset);
else fv[m++] = tt->name_offset;
}
fv[m] = -1;
}
printf("-------------------------- SUPPORTED %s --------------------------\n\n",
typename);
if (!wantscripts) printf(
"This release of PCRE2 supports Unicode's general category properties such\n"
"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
"and the following binary (yes/no) properties:\n\n");
for (int k = 0; k < (n+1)/2; k++)
{
int x;
char buff1[128];
char buff2[128];
format_list_item(found[k], buff1, wantscripts);
x = k + (n+1)/2;
if (x < n) format_list_item(found[x], buff2, wantscripts);
else buff2[0] = 0;
x = printf("%s", buff1);
while (x++ < colwidth) printf(" ");
printf("%s\n", buff2);
}
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Display one modifier *
@ -8672,6 +8834,22 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
goto EXIT;
}
/* List properties and exit */
if (strcmp(arg, "-LP") == 0)
{
display_properties(FALSE);
goto EXIT;
}
/* List scripts and exit */
if (strcmp(arg, "-LS") == 0)
{
display_properties(TRUE);
goto EXIT;
}
/* Display and/or set return code for configuration options. */
if (strcmp(arg, "-C") == 0)