Tidy pcre2demo.c

2016-02-02 16:25:47 +00:00 · 2016-02-02 16:25:47 +00:00 · 4e67c0c9e9
parent 6c1c817438
commit 4e67c0c9e9
12 changed files with 1116 additions and 1020 deletions
--- a/3
+++ b/3
@ -34,6 +34,9 @@ posix_nosub, to call regcomp() with REG_NOSUB. Previously the no_auto_capture
 modifier had this effect. That option is now ignored when the POSIX API is in 
 use.

+8. Minor tidies to the pcre2demo.c sample program, including more comments 
+about its 8-bit-ness.
+

 Version 10.21 12-January-2016
 -----------------------------
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1282,7 +1282,9 @@ If this option is set, it disables the use of numbered capturing parentheses in
 the pattern. Any opening parenthesis that is not followed by ? behaves as if it
 were followed by ?: but named parentheses can still be used for capturing (and
 they acquire numbers in the usual way). There is no equivalent of this option
-in Perl.
+in Perl. Note that, if this option is set, references to capturing groups (back 
+references or recursion/subroutine calls) may only refer to named groups, 
+though the reference can be by name or by number.
 <pre>
  PCRE2_NO_AUTO_POSSESS
 </pre>
@ -3121,9 +3123,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC40" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 16 December 2015
+Last updated: 31 January 2016
 <br>
-Copyright &copy; 1997-2015 University of Cambridge.
+Copyright &copy; 1997-2016 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2demo.html
+++ b/doc/html/pcre2demo.html
@ -20,28 +20,31 @@ please consult the man page, in case the conversion went wrong.
 *************************************************/

 /* This is a demonstration program to illustrate a straightforward way of
-calling the PCRE2 regular expression library from a C program. See the
+using the PCRE2 regular expression library from a C program. See the
 pcre2sample documentation for a short discussion ("man pcre2sample" if you have
 the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
 incompatible with the original PCRE API.

 There are actually three libraries, each supporting a different code unit
-width. This demonstration program uses the 8-bit library.
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.

 In Unix-like environments, if PCRE2 is installed in your standard system
 libraries, you should be able to compile this program using this command:

-gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo

 If PCRE2 is not installed in a standard place, it is likely to be installed
 with support for the pkg-config mechanism. If you have pkg-config, you can
 compile this program using this command:

-gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo

-If you do not have pkg-config, you may have to use this:
+If you do not have pkg-config, you may have to use something like this:

-gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
  -R/usr/local/lib -lpcre2-8 -o pcre2demo

 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
@ -56,9 +59,14 @@ the following line. */

 /* #define PCRE2_STATIC */

-/* This macro must be defined before including pcre2.h. For a program that uses
-only one code unit width, it makes it possible to use generic function names
-such as pcre2_compile(). */
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */

 #define PCRE2_CODE_UNIT_WIDTH 8

@ -79,19 +87,19 @@ int main(int argc, char **argv)
 {
 pcre2_code *re;
 PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
-PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */
 PCRE2_SPTR name_table;

 int crlf_is_newline;
 int errornumber;
 int find_all;
 int i;
-int namecount;
-int name_entry_size;
 int rc;
 int utf8;

 uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
 uint32_t newline;

 PCRE2_SIZE erroroffset;
@ -106,14 +114,18 @@ pcre2_match_data *match_data;
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
 * like Perl's /g option. We set the variable find_all to a non-zero value *
-* if the -g option is present. Apart from that, there must be exactly two *
-* arguments.                                                              *
+* if the -g option is present.                                            *
 **************************************************************************/

 find_all = 0;
 for (i = 1; i &lt; argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+  else if (argv[i][0] == '-')
+    {
+    printf("Unrecognised option %s\n", argv[i]);
+    return 1;
+    }
  else break;
  }

@ -122,7 +134,7 @@ and the subject string. */

 if (argc - i != 2)
  {
-  printf("Two arguments required: a regex and a subject string\n");
+  printf("Exactly two arguments required: a regex and a subject string\n");
  return 1;
  }

@ -201,7 +213,7 @@ if (rc &lt; 0)
 stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
-printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
+printf("Match succeeded at offset %d\n", (int)ovector[0]);


 /*************************************************************************
@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */
  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  &amp;namecount);          /* where to put the answer */

-if (namecount &lt;= 0) printf("No named substrings\n"); else
+if (namecount == 0) printf("No named substrings\n"); else
  {
  PCRE2_SPTR tabptr;
  printf("Named substrings\n");
@ -371,7 +383,7 @@ for (;;)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one code unit */
-    if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
+    if (crlf_is_newline &amp;&amp;                      /* If CRLF is a newline &amp; */
        start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
        subject[start_offset] == '\r' &amp;&amp;
        subject[start_offset + 1] == '\n')
@ -417,7 +429,7 @@ for (;;)
    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
    }

-  if (namecount &lt;= 0) printf("No named substrings\n"); else
+  if (namecount == 0) printf("No named substrings\n"); else
    {
    PCRE2_SPTR tabptr = name_table;
    printf("Named substrings\n");
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -1258,7 +1258,7 @@ PCRE2 does not allow \C to appear in lookbehind assertions
 <a href="#lookbehind">(described below)</a>
 in a UTF mode, because this would make it impossible to calculate the length of
 the lookbehind. Neither the alternative matching function
-<b>pcre2_dfa_match()</b> not the JIT optimizer support \C in a UTF mode. The
+<b>pcre2_dfa_match()</b> nor the JIT optimizer support \C in a UTF mode. The
 former gives a match-time error; the latter fails to optimize and so the match
 is always run using the interpreter.
 </P>
--- a/doc/html/pcre2posix.html
+++ b/doc/html/pcre2posix.html
@ -48,7 +48,7 @@ This set of functions provides a POSIX-style API for the PCRE2 regular
 expression 8-bit library. See the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation for a description of PCRE2's native API, which contains much
-additional functionality. There is no POSIX-style wrapper for PCRE2's 16-bit
+additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit
 and 32-bit libraries.
 </P>
 <P>
@ -67,9 +67,9 @@ POSIX interface often use it, this makes it easier to slot in PCRE2 as a
 replacement library. Other POSIX options are not even defined.
 </P>
 <P>
-There are also some other options that are not defined by POSIX. These have
-been added at the request of users who want to make use of certain
-PCRE2-specific features via the POSIX calling interface.
+There are also some options that are not defined by POSIX. These have been
+added at the request of users who want to make use of certain PCRE2-specific
+features via the POSIX calling interface.
 </P>
 <P>
 When PCRE2 is called via these functions, it is only the API that is POSIX-like
@ -119,11 +119,11 @@ defined POSIX behaviour for REG_NEWLINE (see the following section).
 <pre>
  REG_NOSUB
 </pre>
-The PCRE2_NO_AUTO_CAPTURE option is set when the regular expression is passed
-for compilation to the native function. In addition, when a pattern that is
-compiled with this flag is passed to <b>regexec()</b> for matching, the
-<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
-are returned.
+When a pattern that is compiled with this flag is passed to <b>regexec()</b> for
+matching, the <i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no
+captured strings are returned. Versions of the PCRE library prior to 10.22 used 
+to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens 
+because it disables the use of back references.
 <pre>
  REG_UCP
 </pre>
@ -241,11 +241,12 @@ mutually exclusive; the error REG_INVARG is returned.
 <P>
 If the pattern was compiled with the REG_NOSUB flag, no data about any matched
 strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
-<b>regexec()</b> are ignored.
+<b>regexec()</b> are ignored (except possibly as input for REG_STARTEND).
 </P>
 <P>
-If the value of <i>nmatch</i> is zero, or if the value <i>pmatch</i> is NULL,
-no data about any matched strings is returned.
+The value of <i>nmatch</i> may be zero, and the value <i>pmatch</i> may be NULL
+(unless REG_STARTEND is set); in both these cases no data about any matched
+strings is returned.
 </P>
 <P>
 Otherwise, the portion of the string that was matched, and also any captured
@ -290,9 +291,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC9" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 29 November 2015
+Last updated: 31 January 2016
 <br>
-Copyright &copy; 1997-2015 University of Cambridge.
+Copyright &copy; 1997-2016 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2sample.html
+++ b/doc/html/pcre2sample.html
@ -24,12 +24,11 @@ documentation. If you do not have a copy of the PCRE2 distribution, you can
 save this listing to re-create the contents of <i>pcre2demo.c</i>.
 </P>
 <P>
-The demonstration program, which uses the PCRE2 8-bit library, compiles the
-regular expression that is its first argument, and matches it against the
-subject string in its second argument. No PCRE2 options are set, and default
-character tables are used. If matching succeeds, the program outputs the
-portion of the subject that matched, together with the contents of any captured
-substrings.
+The demonstration program compiles the regular expression that is its
+first argument, and matches it against the subject string in its second
+argument. No PCRE2 options are set, and default character tables are used. If
+matching succeeds, the program outputs the portion of the subject that matched,
+together with the contents of any captured substrings.
 </P>
 <P>
 If the -g option is given on the command line, the program then goes on to
@ -38,34 +37,39 @@ string. The logic is a little bit tricky because of the possibility of matching
 an empty string. Comments in the code explain what is going on.
 </P>
 <P>
+The code in <b>pcre2demo.c</b> is an 8-bit program that uses the PCRE2 8-bit
+library. It handles strings and characters that are stored in 8-bit code units.
+By default, one character corresponds to one code unit, but if the pattern 
+starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, 
+where characters may occupy multiple code units.
+</P>
+<P>
 If PCRE2 is installed in the standard include and library directories for your
 operating system, you should be able to compile the demonstration program using
-this command:
+a command like this:
 <pre>
-  gcc -o pcre2demo pcre2demo.c -lpcre2-8
+  cc -o pcre2demo pcre2demo.c -lpcre2-8
 </pre>
 If PCRE2 is installed elsewhere, you may need to add additional options to the
 command line. For example, on a Unix-like system that has PCRE2 installed in
 <i>/usr/local</i>, you can compile the demonstration program using a command
 like this:
 <pre>
-  gcc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8
-
-</PRE>
-</P>
-<P>
-Once you have compiled and linked the demonstration program, you can run simple
-tests like this:
+  cc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8
+</pre>
+Once you have built the demonstration program, you can run simple tests like
+this:
 <pre>
  ./pcre2demo 'cat|dog' 'the cat sat on the mat'
  ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'
 </pre>
 Note that there is a much more comprehensive test program, called
 <a href="pcre2test.html"><b>pcre2test</b>,</a>
-which supports many more facilities for testing regular expressions using the
-PCRE2 libraries. The
+which supports many more facilities for testing regular expressions using all
+three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be
+installed). The
 <a href="pcre2demo.html"><b>pcre2demo</b></a>
-program is provided as a simple coding example.
+program is provided as a relatively simple coding example.
 </P>
 <P>
 If you try to run
@ -73,7 +77,7 @@ If you try to run
 when PCRE2 is not installed in the standard library directory, you may get an
 error like this on some operating systems (e.g. Solaris):
 <pre>
-  ld.so.1: a.out: fatal: libpcre2.so.0: open failed: No such file or directory
+  ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory
 </pre>
 This is caused by the way shared library support works on those systems. You
 need to add
@ -97,9 +101,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 20 October 2014
+Last updated: 02 February 2016
 <br>
-Copyright &copy; 1997-2014 University of Cambridge.
+Copyright &copy; 1997-2016 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -98,10 +98,11 @@ further data is read.
 </P>
 <P>
 For maximum portability, therefore, it is safest to avoid non-printing
-characters in <b>pcre2test</b> input files. There is a facility for specifying a
-pattern's characters as hexadecimal pairs, thus making it possible to include
-binary zeroes in a pattern for testing purposes. Subject lines are processed
-for backslash escapes, which makes it possible to include any data value.
+characters in <b>pcre2test</b> input files. There is a facility for specifying 
+some or all of a pattern's characters as hexadecimal pairs, thus making it
+possible to include binary zeroes in a pattern for testing purposes. Subject
+lines are processed for backslash escapes, which makes it possible to include
+any data value.
 </P>
 <br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
 <P>
@ -559,7 +560,7 @@ about the pattern:
      debug                     same as info,fullbincode
      fullbincode               show binary code with lengths
  /I  info                      show info about compiled pattern
-      hex                       pattern is coded in hexadecimal
+      hex                       unquoted characters are hexadecimal
      jit[=&#60;number&#62;]            use JIT
      jitfast                   use JIT fast path
      jitverify                 verify JIT use
@ -570,6 +571,7 @@ about the pattern:
      null_context              compile with a NULL context
      parens_nest_limit=&#60;n&#62;     set maximum parentheses depth
      posix                     use the POSIX API
+      posix_nosub               use the POSIX API with REG_NOSUB 
      push                      push compiled pattern onto the stack
      stackguard=&#60;number&#62;       test the stackguard feature
      tables=[0|1|2]            select internal tables
@ -655,20 +657,31 @@ testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses
 default values).
 </P>
 <br><b>
-Specifying a pattern in hex
+Specifying pattern characters in hexadecimal
 </b><br>
 <P>
-The <b>hex</b> modifier specifies that the characters of the pattern are to be
-interpreted as pairs of hexadecimal digits. White space is permitted between
-pairs. For example:
+The <b>hex</b> modifier specifies that the characters of the pattern, except for 
+substrings enclosed in single or double quotes, are to be interpreted as pairs
+of hexadecimal digits. This feature is provided as a way of creating patterns
+that contain binary zeros and other non-printing characters. White space is
+permitted between pairs of digits. For example, this pattern contains three 
+characters:
 <pre>
  /ab 32 59/hex
 </pre>
-This feature is provided as a way of creating patterns that contain binary zero
-and other non-printing characters. By default, <b>pcre2test</b> passes patterns
-as zero-terminated strings to <b>pcre2_compile()</b>, giving the length as
-PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the
-actual length of the pattern is passed.
+Parts of such a pattern are taken literally if quoted. This pattern contains 
+nine characters, only two of which are specified in hexadecimal:
+<pre>
+  /ab "literal" 32/hex
+</pre>
+Either single or double quotes may be used. There is no way of including
+the delimiter within a substring. 
+</P>
+<P>
+By default, <b>pcre2test</b> passes patterns as zero-terminated strings to
+<b>pcre2_compile()</b>, giving the length as PCRE2_ZERO_TERMINATED. However, for
+patterns specified with the <b>hex</b> modifier, the actual length of the
+pattern is passed.
 </P>
 <br><b>
 Generating long repetitive patterns
@ -821,16 +834,17 @@ variable can hold (essentially unlimited).
 Using the POSIX wrapper API
 </b><br>
 <P>
-The <b>/posix</b> modifier causes <b>pcre2test</b> to call PCRE2 via the POSIX
-wrapper API rather than its native API. This supports only the 8-bit library.
-Note that it does not imply POSIX matching semantics; for more detail see the
+The <b>/posix</b> and <b>posix_nosub</b> modifiers cause <b>pcre2test</b> to call
+PCRE2 via the POSIX wrapper API rather than its native API. When
+<b>posix_nosub</b> is used, the POSIX option REG_NOSUB is passed to
+<b>regcomp()</b>. The POSIX wrapper supports only the 8-bit library. Note that
+it does not imply POSIX matching semantics; for more detail see the
 <a href="pcre2posix.html"><b>pcre2posix</b></a>
-documentation. When the POSIX API is being used, the following pattern
-modifiers set options for the <b>regcomp()</b> function:
+documentation. The following pattern modifiers set options for the
+<b>regcomp()</b> function:
 <pre>
  caseless           REG_ICASE
  multiline          REG_NEWLINE
-  no_auto_capture    REG_NOSUB
  dotall             REG_DOTALL     )
  ungreedy           REG_UNGREEDY   ) These options are not part of
  ucp                REG_UCP        )   the POSIX standard
@ -847,7 +861,8 @@ large buffer is used.
 </P>
 <P>
 The <b>aftertext</b> and <b>allaftertext</b> subject modifiers work as described
-below. All other modifiers cause an error.
+below. All other modifiers are either ignored, with a warning message, or cause
+an error.
 </P>
 <br><b>
 Testing the stack guard feature
@ -957,7 +972,7 @@ If the <b>/posix</b> modifier was present on the pattern, causing the POSIX
 wrapper API to be used, the only option-setting modifiers that have any effect
 are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
 REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
-Any other modifiers cause an error.
+The other modifiers are ignored, with a warning message.
 </P>
 <br><b>
 Setting match controls
@ -1001,7 +1016,10 @@ pattern.
      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
      zero_terminate             pass the subject as zero-terminated
 </pre>
-The effects of these modifiers are described in the following sections.
+The effects of these modifiers are described in the following sections. When 
+matching via the POSIX wrapper API, the <b>aftertext</b>, <b>allaftertext</b>,
+and <b>ovector</b> subject modifiers work as described below. All other
+modifiers are either ignored, with a warning message, or cause an error.
 </P>
 <br><b>
 Showing more text
@ -1625,7 +1643,7 @@ usual by an empty line or end of file. This command may be followed by a
 modifier list containing only
 <a href="#controlmodifiers">control modifiers</a>
 that act after a pattern has been compiled. In particular, <b>hex</b>,
-<b>posix</b>, and <b>push</b> are not allowed, nor are any
+<b>posix</b>, <b>posix_nosub</b>, and <b>push</b> are not allowed, nor are any
 <a href="#optionmodifiers">option-setting modifiers.</a>
 The JIT modifiers are, however permitted. Here is an example that saves and
 reloads two patterns.
@ -1660,9 +1678,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 12 December 2015
+Last updated: 31 January 2016
 <br>
-Copyright &copy; 1997-2015 University of Cambridge.
+Copyright &copy; 1997-2016 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -1326,7 +1326,10 @@ COMPILING A PATTERN
       theses  in the pattern. Any opening parenthesis that is not followed by
       ? behaves as if it were followed by ?: but named parentheses can  still
       be  used  for  capturing  (and  they acquire numbers in the usual way).
-       There is no equivalent of this option in Perl.
+       There is no equivalent of this option  in  Perl.  Note  that,  if  this
+       option  is  set,  references  to  capturing  groups (back references or
+       recursion/subroutine calls) may only refer to named groups, though  the
+       reference can be by name or by number.

         PCRE2_NO_AUTO_POSSESS

@ -3055,8 +3058,8 @@ AUTHOR

 REVISION

-       Last updated: 16 December 2015
-       Copyright (c) 1997-2015 University of Cambridge.
+       Last updated: 31 January 2016
+       Copyright (c) 1997-2016 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -6231,7 +6234,7 @@ MATCHING A SINGLE CODE UNIT
       PCRE2  does  not allow \C to appear in lookbehind assertions (described
       below) in a UTF mode, because this would make it impossible  to  calcu-
       late  the  length  of  the lookbehind. Neither the alternative matching
-       function pcre2_dfa_match() not the JIT optimizer support \C  in  a  UTF
+       function pcre2_dfa_match() nor the JIT optimizer support \C  in  a  UTF
       mode. The former gives a match-time error; the latter fails to optimize
       and so the match is always run using the interpreter.

@ -8460,7 +8463,7 @@ DESCRIPTION
       This  set of functions provides a POSIX-style API for the PCRE2 regular
       expression 8-bit library. See the pcre2api documentation for a descrip-
       tion  of PCRE2's native API, which contains much additional functional-
-       ity. There is no POSIX-style wrapper  for  PCRE2's  16-bit  and  32-bit
+       ity. There are no POSIX-style wrappers for PCRE2's  16-bit  and  32-bit
       libraries.

       The functions described here are just wrapper functions that ultimately
@ -8478,8 +8481,8 @@ DESCRIPTION
       easier  to  slot in PCRE2 as a replacement library. Other POSIX options
       are not even defined.

-       There are also some other options that are not defined by POSIX.  These
-       have been added at the request of users who want to make use of certain
+       There are also some options that are not defined by POSIX.  These  have
+       been  added  at  the  request  of users who want to make use of certain
       PCRE2-specific features via the POSIX calling interface.

       When PCRE2 is called via these functions, it is only the  API  that  is
@ -8530,11 +8533,11 @@ COMPILING A PATTERN

         REG_NOSUB

-       The  PCRE2_NO_AUTO_CAPTURE option is set when the regular expression is
-       passed for compilation to the native function. In addition, when a pat-
-       tern  that is compiled with this flag is passed to regexec() for match-
-       ing, the nmatch and pmatch  arguments  are  ignored,  and  no  captured
-       strings are returned.
+       When  a  pattern that is compiled with this flag is passed to regexec()
+       for matching, the nmatch and pmatch arguments are ignored, and no  cap-
+       tured strings are returned. Versions of the PCRE library prior to 10.22
+       used to set the  PCRE2_NO_AUTO_CAPTURE  compile  option,  but  this  no
+       longer happens because it disables the use of back references.

         REG_UCP

@ -8653,17 +8656,18 @@ MATCHING A PATTERN

       If the pattern was compiled with the REG_NOSUB flag, no data about  any
       matched  strings  is  returned.  The  nmatch  and  pmatch  arguments of
-       regexec() are ignored.
+       regexec() are ignored (except possibly as input for REG_STARTEND).

-       If the value of nmatch is zero, or if the value pmatch is NULL, no data
-       about any matched strings is returned.
+       The value of nmatch may be zero, and  the  value  pmatch  may  be  NULL
+       (unless  REG_STARTEND  is  set);  in both these cases no data about any
+       matched strings is returned.

-       Otherwise,the portion of the string that was matched, and also any cap-
-       tured substrings, are returned via the pmatch argument, which points to
-       an  array  of nmatch structures of type regmatch_t, containing the mem-
-       bers rm_so and rm_eo. These contain the byte offset to the first  char-
-       acter of each substring and the offset to the first character after the
-       end of each substring, respectively. The  0th  element  of  the  vector
+       Otherwise, the portion of the string that was  matched,  and  also  any
+       captured substrings, are returned via the pmatch argument, which points
+       to an array of nmatch structures of  type  regmatch_t,  containing  the
+       members  rm_so  and  rm_eo.  These contain the byte offset to the first
+       character of each substring and the offset to the first character after
+       the  end of each substring, respectively. The 0th element of the vector
       relates to the entire portion of string that  was  matched;  subsequent
       elements relate to the capturing subpatterns of the regular expression.
       Unused entries in the array have both structure members set to -1.
@ -8702,8 +8706,8 @@ AUTHOR

 REVISION

-       Last updated: 29 November 2015
-       Copyright (c) 1997-2015 University of Cambridge.
+       Last updated: 31 January 2016
+       Copyright (c) 1997-2016 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -8722,12 +8726,12 @@ PCRE2 SAMPLE PROGRAM
       documentation. If you do not have a copy of the PCRE2 distribution, you
       can save this listing to re-create the contents of pcre2demo.c.

-       The demonstration program, which uses the PCRE2 8-bit library, compiles
-       the regular expression that is  its  first  argument,  and  matches  it
-       against the subject string in its second argument. No PCRE2 options are
-       set, and default character tables are used. If matching  succeeds,  the
-       program  outputs the portion of the subject that matched, together with
-       the contents of any captured substrings.
+       The  demonstration  program compiles the regular expression that is its
+       first argument, and matches it against the subject string in its second
+       argument.  No  PCRE2  options are set, and default character tables are
+       used. If matching succeeds, the program outputs the portion of the sub-
+       ject  that  matched,  together  with  the contents of any captured sub-
+       strings.

       If the -g option is given on the command line, the program then goes on
       to check for further matches of the same regular expression in the same
@ -8735,38 +8739,45 @@ PCRE2 SAMPLE PROGRAM
       bility  of  matching an empty string. Comments in the code explain what
       is going on.

+       The code in pcre2demo.c is an 8-bit program that uses the  PCRE2  8-bit
+       library.  It  handles  strings  and characters that are stored in 8-bit
+       code units.  By default, one character corresponds to  one  code  unit,
+       but  if  the  pattern starts with "(*UTF)", both it and the subject are
+       treated as UTF-8 strings, where characters  may  occupy  multiple  code
+       units.
+
       If  PCRE2  is installed in the standard include and library directories
       for your operating system, you should be able to compile the demonstra-
-       tion program using this command:
+       tion program using a command like this:

-         gcc -o pcre2demo pcre2demo.c -lpcre2-8
+         cc -o pcre2demo pcre2demo.c -lpcre2-8

       If PCRE2 is installed elsewhere, you may need to add additional options
       to the command line. For example, on a Unix-like system that has  PCRE2
       installed  in  /usr/local,  you  can  compile the demonstration program
       using a command like this:

-         gcc -o pcre2demo -I/usr/local/include pcre2demo.c \
+         cc -o pcre2demo -I/usr/local/include pcre2demo.c \
            -L/usr/local/lib -lpcre2-8

-
-       Once  you  have  compiled and linked the demonstration program, you can
-       run simple tests like this:
+       Once you have built the demonstration program, you can run simple tests
+       like this:

         ./pcre2demo 'cat|dog' 'the cat sat on the mat'
         ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'

       Note  that  there  is  a  much  more comprehensive test program, called
       pcre2test, which supports many  more  facilities  for  testing  regular
-       expressions using the PCRE2 libraries. The pcre2demo  program  is  pro-
-       vided as a simple coding example.
+       expressions using all three PCRE2 libraries (8-bit, 16-bit, and 32-bit,
+       though not all three need be installed). The pcre2demo program is  pro-
+       vided as a relatively simple coding example.

       If you try to run pcre2demo when PCRE2 is not installed in the standard
       library directory, you may get an error like  this  on  some  operating
       systems (e.g. Solaris):

-         ld.so.1:  a.out:  fatal:  libpcre2.so.0: open failed: No such file or
-       directory
+         ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file
+       or directory

       This is caused by the way shared library support works  on  those  sys-
       tems. You need to add
@ -8785,8 +8796,8 @@ AUTHOR

 REVISION

-       Last updated: 20 October 2014
-       Copyright (c) 1997-2014 University of Cambridge.
+       Last updated: 02 February 2016
+       Copyright (c) 1997-2016 University of Cambridge.
 ------------------------------------------------------------------------------
 PCRE2SERIALIZE(3)          Library Functions Manual          PCRE2SERIALIZE(3)

--- a/doc/pcre2demo.3
+++ b/doc/pcre2demo.3
@ -20,28 +20,31 @@
 *************************************************/

 /* This is a demonstration program to illustrate a straightforward way of
-calling the PCRE2 regular expression library from a C program. See the
+using the PCRE2 regular expression library from a C program. See the
 pcre2sample documentation for a short discussion ("man pcre2sample" if you have
 the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
 incompatible with the original PCRE API.

 There are actually three libraries, each supporting a different code unit
-width. This demonstration program uses the 8-bit library.
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.

 In Unix-like environments, if PCRE2 is installed in your standard system
 libraries, you should be able to compile this program using this command:

-gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo

 If PCRE2 is not installed in a standard place, it is likely to be installed
 with support for the pkg-config mechanism. If you have pkg-config, you can
 compile this program using this command:

-gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo

-If you do not have pkg-config, you may have to use this:
+If you do not have pkg-config, you may have to use something like this:

-gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
  -R/usr/local/lib -lpcre2-8 -o pcre2demo

 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
@ -56,9 +59,14 @@ the following line. */

 /* #define PCRE2_STATIC */

-/* This macro must be defined before including pcre2.h. For a program that uses
-only one code unit width, it makes it possible to use generic function names
-such as pcre2_compile(). */
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */

 #define PCRE2_CODE_UNIT_WIDTH 8

@ -79,19 +87,19 @@ int main(int argc, char **argv)
 {
 pcre2_code *re;
 PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
-PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */
 PCRE2_SPTR name_table;

 int crlf_is_newline;
 int errornumber;
 int find_all;
 int i;
-int namecount;
-int name_entry_size;
 int rc;
 int utf8;

 uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
 uint32_t newline;

 PCRE2_SIZE erroroffset;
@ -106,14 +114,18 @@ pcre2_match_data *match_data;
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
 * like Perl's /g option. We set the variable find_all to a non-zero value *
-* if the -g option is present. Apart from that, there must be exactly two *
-* arguments.                                                              *
+* if the -g option is present.                                            *
 **************************************************************************/

 find_all = 0;
 for (i = 1; i < argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+  else if (argv[i][0] == '-')
+    {
+    printf("Unrecognised option %s\en", argv[i]);
+    return 1;
+    }
  else break;
  }

@ -122,7 +134,7 @@ and the subject string. */

 if (argc - i != 2)
  {
-  printf("Two arguments required: a regex and a subject string\en");
+  printf("Exactly two arguments required: a regex and a subject string\en");
  return 1;
  }

@ -201,7 +213,7 @@ if (rc < 0)
 stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
-printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
+printf("Match succeeded at offset %d\en", (int)ovector[0]);


 /*************************************************************************
@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */
  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  &namecount);          /* where to put the answer */

-if (namecount <= 0) printf("No named substrings\en"); else
+if (namecount == 0) printf("No named substrings\en"); else
  {
  PCRE2_SPTR tabptr;
  printf("Named substrings\en");
@ -371,7 +383,7 @@ for (;;)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one code unit */
-    if (crlf_is_newline &&                      /* If CRLF is newline & */
+    if (crlf_is_newline &&                      /* If CRLF is a newline & */
        start_offset < subject_length - 1 &&    /* we are at CRLF, */
        subject[start_offset] == '\er' &&
        subject[start_offset + 1] == '\en')
@ -417,7 +429,7 @@ for (;;)
    printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
    }

-  if (namecount <= 0) printf("No named substrings\en"); else
+  if (namecount == 0) printf("No named substrings\en"); else
    {
    PCRE2_SPTR tabptr = name_table;
    printf("Named substrings\en");
--- a/doc/pcre2sample.3
+++ b/doc/pcre2sample.3
@ -1,4 +1,4 @@
-.TH PCRE2SAMPLE 3 "20 October 2014" "PCRE2 10.00"
+.TH PCRE2SAMPLE 3 "02 February 2016" "PCRE2 10.22"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 SAMPLE PROGRAM"
@ -13,23 +13,28 @@ distribution. A listing of this program is given in the
 documentation. If you do not have a copy of the PCRE2 distribution, you can
 save this listing to re-create the contents of \fIpcre2demo.c\fP.
 .P
-The demonstration program, which uses the PCRE2 8-bit library, compiles the
-regular expression that is its first argument, and matches it against the
-subject string in its second argument. No PCRE2 options are set, and default
-character tables are used. If matching succeeds, the program outputs the
-portion of the subject that matched, together with the contents of any captured
-substrings.
+The demonstration program compiles the regular expression that is its
+first argument, and matches it against the subject string in its second
+argument. No PCRE2 options are set, and default character tables are used. If
+matching succeeds, the program outputs the portion of the subject that matched,
+together with the contents of any captured substrings.
 .P
 If the -g option is given on the command line, the program then goes on to
 check for further matches of the same regular expression in the same subject
 string. The logic is a little bit tricky because of the possibility of matching
 an empty string. Comments in the code explain what is going on.
 .P
+The code in \fBpcre2demo.c\fP is an 8-bit program that uses the PCRE2 8-bit
+library. It handles strings and characters that are stored in 8-bit code units.
+By default, one character corresponds to one code unit, but if the pattern 
+starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, 
+where characters may occupy multiple code units.
+.P
 If PCRE2 is installed in the standard include and library directories for your
 operating system, you should be able to compile the demonstration program using
-this command:
+a command like this:
 .sp
-  gcc -o pcre2demo pcre2demo.c -lpcre2-8
+  cc -o pcre2demo pcre2demo.c -lpcre2-8
 .sp
 If PCRE2 is installed elsewhere, you may need to add additional options to the
 command line. For example, on a Unix-like system that has PCRE2 installed in
@ -37,12 +42,11 @@ command line. For example, on a Unix-like system that has PCRE2 installed in
 like this:
 .sp
 .\" JOINSH
-  gcc -o pcre2demo -I/usr/local/include pcre2demo.c \e
+  cc -o pcre2demo -I/usr/local/include pcre2demo.c \e
     -L/usr/local/lib -lpcre2-8
 .sp
-.P
-Once you have compiled and linked the demonstration program, you can run simple
-tests like this:
+Once you have built the demonstration program, you can run simple tests like
+this:
 .sp
  ./pcre2demo 'cat|dog' 'the cat sat on the mat'
  ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'
@ -51,12 +55,13 @@ Note that there is a much more comprehensive test program, called
 .\" HREF
 \fBpcre2test\fP,
 .\"
-which supports many more facilities for testing regular expressions using the
-PCRE2 libraries. The
+which supports many more facilities for testing regular expressions using all
+three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be
+installed). The
 .\" HREF
 \fBpcre2demo\fP
 .\"
-program is provided as a simple coding example.
+program is provided as a relatively simple coding example.
 .P
 If you try to run
 .\" HREF
@ -65,7 +70,7 @@ If you try to run
 when PCRE2 is not installed in the standard library directory, you may get an
 error like this on some operating systems (e.g. Solaris):
 .sp
-  ld.so.1: a.out: fatal: libpcre2.so.0: open failed: No such file or directory
+  ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory
 .sp
 This is caused by the way shared library support works on those systems. You
 need to add
@ -89,6 +94,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 20 October 2014
-Copyright (c) 1997-2014 University of Cambridge.
+Last updated: 02 February 2016
+Copyright (c) 1997-2016 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -67,10 +67,10 @@ INPUT ENCODING

       For  maximum portability, therefore, it is safest to avoid non-printing
       characters in pcre2test input files. There is a facility for specifying
-       a pattern's characters as hexadecimal pairs, thus making it possible to
-       include binary zeroes in a pattern for testing purposes. Subject  lines
-       are processed for backslash escapes, which makes it possible to include
-       any data value.
+       some or all of a pattern's characters as hexadecimal pairs, thus making
+       it possible to include binary zeroes in a pattern for testing purposes.
+       Subject  lines are processed for backslash escapes, which makes it pos-
+       sible to include any data value.


 COMMAND LINE OPTIONS
@ -505,7 +505,7 @@ PATTERN MODIFIERS
             debug                     same as info,fullbincode
             fullbincode               show binary code with lengths
         /I  info                      show info about compiled pattern
-             hex                       pattern is coded in hexadecimal
+             hex                       unquoted characters are hexadecimal
             jit[=<number>]            use JIT
             jitfast                   use JIT fast path
             jitverify                 verify JIT use
@ -516,6 +516,7 @@ PATTERN MODIFIERS
             null_context              compile with a NULL context
             parens_nest_limit=<n>     set maximum parentheses depth
             posix                     use the POSIX API
+             posix_nosub               use the POSIX API with REG_NOSUB
             push                      push compiled pattern onto the stack
             stackguard=<number>       test the stackguard feature
             tables=[0|1|2]            select internal tables
@ -591,19 +592,30 @@ PATTERN MODIFIERS
       testing that pcre2_compile() behaves correctly in this  case  (it  uses
       default values).

-   Specifying a pattern in hex
+   Specifying pattern characters in hexadecimal

-       The hex modifier specifies that the characters of the pattern are to be
-       interpreted as pairs of hexadecimal digits. White  space  is  permitted
-       between pairs. For example:
+       The  hex  modifier specifies that the characters of the pattern, except
+       for substrings enclosed in single or double quotes, are  to  be  inter-
+       preted  as  pairs  of hexadecimal digits. This feature is provided as a
+       way of creating patterns that contain binary zeros and other non-print-
+       ing  characters.  White space is permitted between pairs of digits. For
+       example, this pattern contains three characters:

         /ab 32 59/hex

-       This  feature  is  provided  as a way of creating patterns that contain
-       binary zero and other non-printing characters.  By  default,  pcre2test
-       passes  patterns  as zero-terminated strings to pcre2_compile(), giving
-       the length as PCRE2_ZERO_TERMINATED. However, for patterns specified in
-       hexadecimal, the actual length of the pattern is passed.
+       Parts of such a pattern are taken literally  if  quoted.  This  pattern
+       contains  nine characters, only two of which are specified in hexadeci-
+       mal:
+
+         /ab "literal" 32/hex
+
+       Either single or double quotes may be used. There is no way of  includ-
+       ing the delimiter within a substring.
+
+       By  default,  pcre2test  passes  patterns as zero-terminated strings to
+       pcre2_compile(), giving the length as  PCRE2_ZERO_TERMINATED.  However,
+       for  patterns specified with the hex modifier, the actual length of the
+       pattern is passed.

   Generating long repetitive patterns

@ -732,16 +744,16 @@ PATTERN MODIFIERS

   Using the POSIX wrapper API

-       The  /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
-       per API rather than its  native  API.  This  supports  only  the  8-bit
-       library.   Note  that  it  does not imply POSIX matching semantics; for
-       more detail see the pcre2posix documentation. When  the  POSIX  API  is
-       being  used,  the  following pattern modifiers set options for the reg-
-       comp() function:
+       The /posix and posix_nosub modifiers cause pcre2test to call PCRE2  via
+       the  POSIX  wrapper API rather than its native API. When posix_nosub is
+       used, the POSIX option REG_NOSUB is  passed  to  regcomp().  The  POSIX
+       wrapper  supports  only  the 8-bit library. Note that it does not imply
+       POSIX matching semantics; for more detail see the pcre2posix documenta-
+       tion.  The  following  pattern  modifiers set options for the regcomp()
+       function:

         caseless           REG_ICASE
         multiline          REG_NEWLINE
-         no_auto_capture    REG_NOSUB
         dotall             REG_DOTALL     )
         ungreedy           REG_UNGREEDY   ) These options are not part of
         ucp                REG_UCP        )   the POSIX standard
@ -758,7 +770,8 @@ PATTERN MODIFIERS
       been set, a large buffer is used.

       The aftertext and allaftertext  subject  modifiers  work  as  described
-       below. All other modifiers cause an error.
+       below.  All other modifiers are either ignored, with a warning message,
+       or cause an error.

   Testing the stack guard feature

@ -855,7 +868,7 @@ SUBJECT MODIFIERS
       wrapper API to be used, the only option-setting modifiers that have any
       effect   are   notbol,   notempty,   and  noteol,  causing  REG_NOTBOL,
       REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to  regexec().
-       Any other modifiers cause an error.
+       The other modifiers are ignored, with a warning message.

   Setting match controls

@ -898,6 +911,9 @@ SUBJECT MODIFIERS
             zero_terminate             pass the subject as zero-terminated

       The effects of these modifiers are described in the following sections.
+       When  matching  via the POSIX wrapper API, the aftertext, allaftertext,
+       and ovector subject modifiers work as described below. All other  modi-
+       fiers are either ignored, with a warning message, or cause an error.

   Showing more text

@ -1472,9 +1488,9 @@ SAVING AND RESTORING COMPILED PATTERNS
       matched  with  the pattern, terminated as usual by an empty line or end
       of file. This command may be followed by  a  modifier  list  containing
       only  control  modifiers that act after a pattern has been compiled. In
-       particular, hex, posix, and push are not allowed, nor are  any  option-
-       setting  modifiers.   The JIT modifiers are, however permitted. Here is
-       an example that saves and reloads two patterns.
+       particular, hex, posix, posix_nosub, and push are not allowed, nor  are
+       any  option-setting  modifiers.  The JIT modifiers are, however permit-
+       ted. Here is an example that saves and reloads two patterns.

         /abc/push
         /xyz/push
@ -1505,5 +1521,5 @@ AUTHOR

 REVISION

-       Last updated: 12 December 2015
-       Copyright (c) 1997-2015 University of Cambridge.
+       Last updated: 31 January 2016
+       Copyright (c) 1997-2016 University of Cambridge.
--- a/src/pcre2demo.c
+++ b/src/pcre2demo.c
@ -3,28 +3,31 @@
 *************************************************/

 /* This is a demonstration program to illustrate a straightforward way of
-calling the PCRE2 regular expression library from a C program. See the
+using the PCRE2 regular expression library from a C program. See the
 pcre2sample documentation for a short discussion ("man pcre2sample" if you have
 the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
 incompatible with the original PCRE API.

 There are actually three libraries, each supporting a different code unit
-width. This demonstration program uses the 8-bit library.
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.

 In Unix-like environments, if PCRE2 is installed in your standard system
 libraries, you should be able to compile this program using this command:

-gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo

 If PCRE2 is not installed in a standard place, it is likely to be installed
 with support for the pkg-config mechanism. If you have pkg-config, you can
 compile this program using this command:

-gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo

-If you do not have pkg-config, you may have to use this:
+If you do not have pkg-config, you may have to use something like this:

-gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
  -R/usr/local/lib -lpcre2-8 -o pcre2demo

 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
@ -39,9 +42,14 @@ the following line. */

 /* #define PCRE2_STATIC */

-/* This macro must be defined before including pcre2.h. For a program that uses
-only one code unit width, it makes it possible to use generic function names
-such as pcre2_compile(). */
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */

 #define PCRE2_CODE_UNIT_WIDTH 8

@ -62,19 +70,19 @@ int main(int argc, char **argv)
 {
 pcre2_code *re;
 PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
-PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */
 PCRE2_SPTR name_table;

 int crlf_is_newline;
 int errornumber;
 int find_all;
 int i;
-int namecount;
-int name_entry_size;
 int rc;
 int utf8;

 uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
 uint32_t newline;

 PCRE2_SIZE erroroffset;
@ -89,14 +97,18 @@ pcre2_match_data *match_data;
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
 * like Perl's /g option. We set the variable find_all to a non-zero value *
-* if the -g option is present. Apart from that, there must be exactly two *
-* arguments.                                                              *
+* if the -g option is present.                                            *
 **************************************************************************/

 find_all = 0;
 for (i = 1; i < argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+  else if (argv[i][0] == '-')
+    {
+    printf("Unrecognised option %s\n", argv[i]);
+    return 1;
+    }
  else break;
  }

@ -105,7 +117,7 @@ and the subject string. */

 if (argc - i != 2)
  {
-  printf("Two arguments required: a regex and a subject string\n");
+  printf("Exactly two arguments required: a regex and a subject string\n");
  return 1;
  }

@ -184,7 +196,7 @@ if (rc < 0)
 stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
-printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
+printf("Match succeeded at offset %d\n", (int)ovector[0]);


 /*************************************************************************
@ -225,7 +237,7 @@ we have to extract the count of named parentheses from the pattern. */
  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  &namecount);          /* where to put the answer */

-if (namecount <= 0) printf("No named substrings\n"); else
+if (namecount == 0) printf("No named substrings\n"); else
  {
  PCRE2_SPTR tabptr;
  printf("Named substrings\n");
@ -354,7 +366,7 @@ for (;;)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one code unit */
-    if (crlf_is_newline &&                      /* If CRLF is newline & */
+    if (crlf_is_newline &&                      /* If CRLF is a newline & */
        start_offset < subject_length - 1 &&    /* we are at CRLF, */
        subject[start_offset] == '\r' &&
        subject[start_offset + 1] == '\n')
@ -400,7 +412,7 @@ for (;;)
    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
    }

-  if (namecount <= 0) printf("No named substrings\n"); else
+  if (namecount == 0) printf("No named substrings\n"); else
    {
    PCRE2_SPTR tabptr = name_table;
    printf("Named substrings\n");