Documentation scripts

2014-09-23 11:35:51 +00:00 · 2014-09-23 11:35:51 +00:00 · d5495a30f4
parent a625f0ea01
commit d5495a30f4
19 changed files with 10412 additions and 6 deletions
--- a/313
+++ b/313
@ -0,0 +1,313 @@
 #! /usr/bin/perl -w
 # Script to turn PCRE2 man pages into HTML
 # Subroutine to handle font changes and other escapes
 sub do_line {
 my($s) = $_[0];
 $s =~ s/</&#60;/g;                   # Deal with < and >
 $s =~ s/>/&#62;/g;
 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
 $s =~ s"\\e"\\"g;
 $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
 $s;
 }
 # Subroutine to ensure not in a paragraph
 sub end_para {
 if ($inpara)
  {
  print TEMP "</PRE>\n" if ($inpre);
  print TEMP "</P>\n";
  }
 $inpara = $inpre = 0;
 $wrotetext = 0;
 }
 # Subroutine to start a new paragraph
 sub new_para {
 &end_para();
 print TEMP "<P>\n";
 $inpara = 1;
 }
 # Main program
 $innf = 0;
 $inpara = 0;
 $inpre = 0;
 $wrotetext = 0;
 $toc = 0;
 $ref = 1;
 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
  {
  $toc = 1 if $ARGV[0] eq "-toc";
  shift;
  }
 # Initial output to STDOUT
 print <<End ;
 <html>
 <head>
 <title>$ARGV[0] specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>$ARGV[0] man page</h1>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 <p>
 This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
 End
 print "<ul>\n" if ($toc);
 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
 while (<STDIN>)
  {
  # Handle lines beginning with a dot
  if (/^\./)
    {
    # Some of the PCRE2 man pages used to contain instances of .br. However,
    # they should have all been removed because they cause trouble in some
    # (other) automated systems that translate man pages to HTML. Complain if
    # we find .br or .in (another macro that is deprecated).
    if (/^\.br/ || /^\.in/)
      {
      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
      print STDERR "*** $_\n";
      die "*** Processing abandoned\n";
      }
    # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
    elsif (/^\.nf/)
      {
      $innf = 1;
      }
    elsif (/^\.fi/)
      {
      $innf = 0;
      }
    # Handling .sp is subtle. If it is inside a literal section, do nothing if
    # the next line is a non literal text line; similarly, if not inside a
    # literal section, do nothing if a literal follows, unless we are inside
    # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
    # literal sections will do the spacing. Always skip if no previous output.
    elsif (/^\.sp/)
      {
      if ($wrotetext)
        {
        $_ = <STDIN>;
        if ($inpre)
          {
          print TEMP "\n" if (/^[\s.]/);
          }
        else
          {
          print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
          }
        redo;    # Now process the lookahead line we just read
        }
      }
    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
      {
      &new_para();
      }
    elsif (/^\.SH\s*("?)(.*)\1/)
      {
      # Ignore the NAME section
      if ($2 =~ /^NAME\b/)
        {
        <STDIN>;
        next;
        }
      &end_para();
      my($title) = &do_line($2);
      if ($toc)
        {
        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
          $ref, $ref);
        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
          $ref, $ref);
        $ref++;
        }
      else
        {
        print TEMP "<br><b>\n$title\n</b><br>\n";
        }
      }
    elsif (/^\.SS\s*("?)(.*)\1/)
      {
      &end_para();
      my($title) = &do_line($2);
      print TEMP "<br><b>\n$title\n</b><br>\n";
      }
    elsif (/^\.B\s*(.*)/)
      {
      &new_para() if (!$inpara);
      $_ = &do_line($1);
      s/"(.*?)"/$1/g;
      print TEMP "<b>$_</b>\n";
      $wrotetext = 1;
      }
    elsif (/^\.I\s*(.*)/)
      {
      &new_para() if (!$inpara);
      $_ = &do_line($1);
      s/"(.*?)"/$1/g;
      print TEMP "<i>$_</i>\n";
      $wrotetext = 1;
      }
    # A comment that starts "HREF" takes the next line as a name that
    # is turned into a hyperlink, using the text given, which might be
    # in a special font. If it ends in () or (digits) or punctuation, they
    # aren't part of the link.
    elsif (/^\.\\"\s*HREF/)
      {
      $_=<STDIN>;
      chomp;
      $_ = &do_line($_);
      $_ =~ s/\s+$//;
      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
      print TEMP "<a href=\"$1.html\">$_</a>\n";
      }
    # A comment that starts "HTML" inserts literal HTML
    elsif (/^\.\\"\s*HTML\s*(.*)/)
      {
      print TEMP $1;
      }
    # A comment that starts < inserts that HTML at the end of the
    # *next* input line - so as not to get a newline between them.
    elsif (/^\.\\"\s*(<.*>)/)
      {
      my($markup) = $1;
      $_=<STDIN>;
      chomp;
      $_ = &do_line($_);
      $_ =~ s/\s+$//;
      print TEMP "$_$markup\n";
      }
    # A comment that starts JOIN joins the next two lines together, with one
    # space between them. Then that line is processed. This is used in some
    # displays where two lines are needed for the "man" version. JOINSH works
    # the same, except that it assumes this is a shell command, so removes
    # continuation backslashes.
    elsif (/^\.\\"\s*JOIN(SH)?/)
      {
      my($one,$two);
      $one = <STDIN>;
      $two = <STDIN>;
      $one =~ s/\s*\\e\s*$// if (defined($1));
      chomp($one);
      $two =~ s/^\s+//;
      $_ = "$one $two";
      redo;            # Process the joined lines
      }
    # .EX/.EE are used in the pcredemo page to bracket the entire program,
    # which is unmodified except for turning backslash into "\e".
    elsif (/^\.EX\s*$/)
      {
      print TEMP "<PRE>\n";
      while (<STDIN>)
        {
        last if /^\.EE\s*$/;
        s/\\e/\\/g;
        s/&/&amp;/g;
        s/</&lt;/g;
        s/>/&gt;/g;
        print TEMP;
        }
      }
    # Ignore anything not recognized
    next;
    }
  # Line does not begin with a dot. Replace blank lines with new paragraphs
  if (/^\s*$/)
    {
    &end_para() if ($wrotetext);
    next;
    }
  # Convert fonts changes and output an ordinary line. Ensure that indented
  # lines are marked as literal.
  $_ = &do_line($_);
  &new_para() if (!$inpara);
  if (/^\s/)
    {
    if (!$inpre)
      {
      print TEMP "<pre>\n";
      $inpre = 1;
      }
    }
  elsif ($inpre)
    {
    print TEMP "</pre>\n";
    $inpre = 0;
    }
  # Add <br> to the end of a non-literal line if we are within .nf/.fi
  $_ .= "<br>\n" if (!$inpre && $innf);
  print TEMP;
  $wrotetext = 1;
  }
 # The TOC, if present, will have been written - terminate it
 print "</ul>\n" if ($toc);
 # Copy the remainder to the standard output
 close(TEMP);
 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
 print while (<TEMP>);
 print <<End ;
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 End
 close(TEMP);
 unlink("/tmp/$$");
 # End
--- a/67
+++ b/67
@ -0,0 +1,67 @@
 #! /usr/bin/perl
 # A script to scan PCRE2's man pages to check for typos in the control
 # sequences. I use only a small set of the available repertoire, so it is 
 # straightforward to check that nothing else has slipped in by mistake. This
 # script should be called in the doc directory.
 $yield = 0;
 while (scalar(@ARGV) > 0)
  {
  $line = 0; 
  $file = shift @ARGV;
  open (IN, $file) || die "Failed to open $file\n";
  while (<IN>)
    {  
    $line++; 
    if (/^\s*$/)
      {
      printf "Empty line $line of $file\n";
      $yield = 1;  
      }   
    elsif (/^\./)
      {
      if (!/^\.\s*$|
            ^\.B\s+\S| 
            ^\.TH\s\S|
            ^\.SH\s\S|
            ^\.SS\s\S|
            ^\.TP(?:\s?\d+)?\s*$|
            ^\.SM\s*$|
            ^\.br\s*$| 
            ^\.rs\s*$| 
            ^\.sp\s*$| 
            ^\.nf\s*$| 
            ^\.fi\s*$| 
            ^\.P\s*$| 
            ^\.PP\s*$| 
            ^\.\\"(?:\ HREF)?\s*$|
            ^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
            ^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
            ^\.\\"\s<\/a>\s*$|
            ^\.\\"\sJOINSH\s*$|
            ^\.\\"\sJOIN\s*$/x  
         )
        {
        printf "Bad control line $line of $file\n";
        $yield = 1;
        }
      }
    else
      {
      if (/\\[^ef]|\\f[^IBP]/)
        {
        printf "Bad backslash in line $line of $file\n";  
        $yield = 1; 
        } 
      }   
    }
  close(IN);   
  }
 exit $yield;
 # End  
--- a/113
+++ b/113
@ -0,0 +1,113 @@
 #! /usr/bin/perl -w
 # Script to take the output of nroff -man and remove all the backspacing and
 # the page footers and the screen commands etc so that it is more usefully
 # readable online. In fact, in the latest nroff, intermediate footers don't
 # seem to be generated any more.
 $blankcount = 0;
 $lastwascut = 0;
 $firstheader = 1;
 # Input on STDIN; output to STDOUT.
 while (<STDIN>)
  {
  s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
  s/.\x8//g;         # Remove "char, backspace"
  # Handle header lines. Retain only the first one we encounter, but remove
  # the blank line that follows. Any others (e.g. at end of document) and the
  # following blank line are dropped.
  if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
    {
    if ($firstheader)
      {
      $firstheader = 0;
      print;
      $lastprinted = $_;
      $lastwascut = 0;
      }
    $_=<STDIN>;       # Remove a blank that follows
    next;
    }
  # Count runs of empty lines
  if (/^\s*$/)
    {
    $blankcount++;
    $lastwascut = 0;
    next;
    }
  # If a chunk of lines has been cut out (page footer) and the next line
  # has a different indentation, put back one blank line.
  if ($lastwascut && $blankcount < 1 && defined($lastprinted))
    {
    ($a) = $lastprinted =~ /^(\s*)/;
    ($b) = $_ =~ /^(\s*)/;
    $blankcount++ if ($a ne $b);
    }
  # We get here only when we have a non-blank line in hand. If it was preceded
  # by 3 or more blank lines, read the next 3 lines and see if they are blank.
  # If so, remove all 7 lines, and remember that we have just done a cut.
  if ($blankcount >= 3)
    {
    for ($i = 0; $i < 3; $i++)
      {
      $next[$i] = <STDIN>;
      $next[$i] = "" if !defined $next[$i];
      $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
      $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
      }
    # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
    if ($next[0] =~ /^\s*$/ &&
        $next[1] =~ /^\s*$/ &&
        $next[2] =~ /^\s*$/)
      {
      $blankcount -= 3;
      $lastwascut = 1;
      }
    # Otherwise output the saved blanks, the current, and the next three
    # lines. Remember the last printed line.
    else
      {
      for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
      print;
      for ($i = 0; $i < 3; $i++)
        {
        $next[$i] =~ s/.\x8//g;
        print $next[$i];
        $lastprinted = $_;
        }
      $lastwascut = 0;
      $blankcount = 0;
      }
    }
  # This non-blank line is not preceded by 3 or more blank lines. Output
  # any blanks there are, and the line. Remember it. Force two blank lines
  # before headings.
  else
    {
    $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
      defined($lastprinted);
    for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
    print;
    $lastprinted = $_;
    $lastwascut = 0;
    $blankcount = 0;
    }
  }
 # End
--- a/35
+++ b/35
@ -0,0 +1,35 @@
 #!/usr/bin/perl
 # This is a script for removing trailing whitespace from lines in files that
 # are listed on the command line.
 # This subroutine does the work for one file.
 sub detrail {
 my($file) = $_[0];
 my($changed) = 0;
 open(IN, "$file") || die "Can't open $file for input";
@lines = <IN>;
 close(IN);
 foreach (@lines)
  {
  if (/\s+\n$/)
    {
    s/\s+\n$/\n/;
    $changed = 1;
    }
  }
 if ($changed)
  {
  open(OUT, ">$file") || die "Can't open $file for output";
  print OUT @lines;
  close(OUT);
  }
 }
 # This is the main program
 $, = "";   # Output field separator
 for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
 # End
--- a/265
+++ b/265
@ -0,0 +1,265 @@
 #/bin/sh
 # Script to prepare the files for building a PCRE2 release. It does some
 # processing of the documentation, detrails files, and creates pcre2.h.generic
 # and config.h.generic (for use by builders who can't run ./configure).
 # You must run this script before runnning "make dist". If its first argument
 # is "doc", it stops after preparing the documentation. There are no other
 # arguments. The script makes use of the following files:
 # 132html     A Perl script that converts a .1 or .3 man page into HTML. It
 #             "knows" the relevant troff constructs that are used in the PCRE2
 #             man pages.
 # CheckMan    A Perl script that checks man pages for typos in the mark up.
 # CleanTxt    A Perl script that cleans up the output of "nroff -man" by
 #             removing backspaces and other redundant text so as to produce
 #             a readable .txt file.
 # Detrail     A Perl script that removes trailing spaces from files.
 # doc/index.html.src
 #             A file that is copied as index.html into the doc/html directory
 #             when the HTML documentation is built. It works like this so that
 #             doc/html can be deleted and re-created from scratch.
 # README & NON-AUTOTOOLS-BUILD
 #             These files are copied into the doc/html directory, with .txt
 #             extensions so that they can by hyperlinked from the HTML 
 #             documentation, because some people just go to the HTML without
 #             looking for text files.
 # First, sort out the documentation. Remove pcre2demo.3 first because it won't
 # pass the markup check (it is created below, using markup that none of the
 # other pages use).
 cd doc
 echo Processing documentation
 /bin/rm -f pcre2demo.3
 # Check the remaining man pages
 perl ../CheckMan *.1 *.3
 if [ $? != 0 ] ; then exit 1; fi
 # Make Text form of the documentation. It needs some mangling to make it
 # tidy for online reading. Concatenate all the .3 stuff, but omit the
 # individual function pages.
 cat <<End >pcre2.txt
 -----------------------------------------------------------------------------
 This file contains a concatenation of the PCRE2 man pages, converted to plain
 text format for ease of searching with a text editor, or for use on systems
 that do not have a man page processor. The small individual files that give
 synopses of each function in the library have not been included. Neither has
 the pcre2demo program. There are separate text files for the pcre2grep and
 pcre2test commands.
 -----------------------------------------------------------------------------
 End
 echo "Making pcre2.txt"
 for file in pcre2api pcre2callout pcre2unicode ; do
 #for file in pcre pcre16 pcre32 pcrebuild pcrematching \
 #            pcrecompat pcrepattern pcresyntax pcrejit pcrepartial \
 #            pcreprecompile pcreperform pcreposix pcrecpp pcresample \
 #            pcrelimits pcrestack ; do
  echo "  Processing $file.3"
  nroff -c -man $file.3 >$file.rawtxt
  perl ../CleanTxt <$file.rawtxt >>pcre2.txt
  /bin/rm $file.rawtxt
  echo "------------------------------------------------------------------------------" >>pcre2.txt
  if [ "$file" != "pcre2sample" ] ; then
    echo " " >>pcre2.txt
    echo " " >>pcre2.txt
  fi
 done
 # The three commands
 for file in pcre2test ; do
 # for file in pcre2test pcre2grep pcre-config ; do
  echo Making $file.txt
  nroff -c -man $file.1 >$file.rawtxt
  perl ../CleanTxt <$file.rawtxt >$file.txt
  /bin/rm $file.rawtxt
 done
 # Make pcre2demo.3 from the pcre2demo.c source file
 echo "Making pcre2demo.3"
 perl <<"END" >pcre2demo.3
  open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
  open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
  print OUT ".\\\" Start example.\n" .
            ".de EX\n" .
            ".  nr mE \\\\n(.f\n" .
            ".  nf\n" .
            ".  nh\n" .
            ".  ft CW\n" .
            "..\n" .
            ".\n" .
            ".\n" .
            ".\\\" End example.\n" .
            ".de EE\n" .
            ".  ft \\\\n(mE\n" .
            ".  fi\n" .
            ".  hy \\\\n(HY\n" .
            "..\n" .
            ".\n" .
            ".EX\n" ;
  while (<IN>)
    {
    s/\\/\\e/g;
    print OUT;
    }
  print OUT ".EE\n";
  close(IN);
  close(OUT);
 END
 if [ $? != 0 ] ; then exit 1; fi
 # Make HTML form of the documentation.
 echo "Making HTML documentation"
 /bin/rm html/*
 cp index.html.src html/index.html
 cp ../README html/README.txt
 # cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
 for file in *.1 ; do
  base=`basename $file .1`
  echo "  Making $base.html"
  perl ../132html -toc $base <$file >html/$base.html
 done
 # Exclude table of contents for function summaries. It seems that expr
 # forces an anchored regex. Also exclude them for small pages that have
 # only one section.
 for file in *.3 ; do
  base=`basename $file .3`
  toc=-toc
  if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
  if [ "$base" = "pcre2sample" ]  || \
     [ "$base" = "pcre2stack" ]   || \
     [ "$base" = "pcre2compat" ]  || \
     [ "$base" = "pcre2limits" ]  || \
     [ "$base" = "pcre2perform" ] || \
     [ "$base" = "pcre2unicode" ] ; then
    toc=""
  fi
  echo "  Making $base.html"
  perl ../132html $toc $base <$file >html/$base.html
  if [ $? != 0 ] ; then exit 1; fi
 done
 # End of documentation processing; stop if only documentation required.
 cd ..
 echo Documentation done
 if [ "$1" = "doc" ] ; then exit; fi
 # FIXME pro tem only do docs
 exit
 # These files are detrailed; do not detrail the test data because there may be
 # significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
 # line endings and the detrail script removes all trailing white space. The
 # configure files are also omitted from the detrailing. We don't bother with
 # those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
 # common file, because they aren't going to change.
 files="\
  Makefile.am \
  Makefile.in \
  configure.ac \
  README \
  LICENCE \
  COPYING \
  AUTHORS \
  NEWS \
  NON-UNIX-USE \
  NON-AUTOTOOLS-BUILD \
  INSTALL \
  132html \
  CleanTxt \
  Detrail \
  ChangeLog \
  CMakeLists.txt \
  RunGrepTest \
  RunTest \
  pcre-config.in \
  libpcre.pc.in \
  libpcre16.pc.in \
  libpcre32.pc.in \
  libpcreposix.pc.in \
  libpcrecpp.pc.in \
  config.h.in \
  pcre_chartables.c.dist \
  pcredemo.c \
  pcregrep.c \
  pcretest.c \
  dftables.c \
  pcreposix.c \
  pcreposix.h \
  pcre.h.in \
  pcre_internal.h \
  pcre_byte_order.c \
  pcre_compile.c \
  pcre_config.c \
  pcre_dfa_exec.c \
  pcre_exec.c \
  pcre_fullinfo.c \
  pcre_get.c \
  pcre_globals.c \
  pcre_jit_compile.c \
  pcre_jit_test.c \
  pcre_maketables.c \
  pcre_newline.c \
  pcre_ord2utf8.c \
  pcre16_ord2utf16.c \
  pcre32_ord2utf32.c \
  pcre_printint.c \
  pcre_refcount.c \
  pcre_string_utils.c \
  pcre_study.c \
  pcre_tables.c \
  pcre_valid_utf8.c \
  pcre_version.c \
  pcre_xclass.c \
  pcre16_utf16_utils.c \
  pcre32_utf32_utils.c \
  pcre16_valid_utf16.c \
  pcre32_valid_utf32.c \
  pcre_scanner.cc \
  pcre_scanner.h \
  pcre_scanner_unittest.cc \
  pcrecpp.cc \
  pcrecpp.h \
  pcrecpparg.h.in \
  pcrecpp_unittest.cc \
  pcre_stringpiece.cc \
  pcre_stringpiece.h.in \
  pcre_stringpiece_unittest.cc \
  perltest.pl \
  ucp.h \
  makevp.bat \
  pcre.def \
  libpcre.def \
  libpcreposix.def"
 echo Detrailing
 perl ./Detrail $files doc/p* doc/html/*
 echo Done
 #End
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -0,0 +1 @@
 This is a placeholder README file for a work in progress.
--- a/doc/html/index.html
+++ b/doc/html/index.html
@ -0,0 +1,177 @@
 <html>
 <!-- This is a manually maintained file that is the root of the HTML version of 
     the PCRE2 documentation. When the HTML documents are built from the man 
     page versions, the entire doc/html directory is emptied, this file is then 
     copied into doc/html/index.html, and the remaining files therein are 
     created by the 132html script.
 -->      
 <head>
 <title>PCRE2 specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
 <p>
 The HTML documentation for PCRE2 consists of a number of pages that are listed
 below in alphabetical order. If you are new to PCRE2, please read the first one
 first.
 </p>
 <table>
 <tr><td><a href="pcre2.html">pcre</a></td>
    <td>&nbsp;&nbsp;Introductory page</td></tr>
 <tr><td><a href="pcre2-config.html">pcre-config</a></td>
    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
 <tr><td><a href="pcre2api.html">pcreapi</a></td>
    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
 <tr><td><a href="pcre2build.html">pcrebuild</a></td>
    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
 <tr><td><a href="pcre2callout.html">pcre2callout</a></td>
    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
 <tr><td><a href="pcre2compat.html">pcre2compat</a></td>
    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
 <tr><td><a href="pcre2demo.html">pcre2demo</a></td>
    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
 <tr><td><a href="pcre2grep.html">pcre2grep</a></td>
    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
 <tr><td><a href="pcre2jit.html">pcre2jit</a></td>
    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
 <tr><td><a href="pcre2limits.html">pcre2limits</a></td>
    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
 <tr><td><a href="pcre2matching.html">pcre2matching</a></td>
    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
 <tr><td><a href="pcre2partial.html">pcre2partial</a></td>
    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
 <tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
 <tr><td><a href="pcre2perform.html">pcre2perform</a></td>
    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
 <tr><td><a href="pcre2posix.html">pcre2posix</a></td>
    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
 <tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
 <tr><td><a href="pcre2sample.html">pcre2sample</a></td>
    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
 <tr><td><a href="pcre2stack.html">pcre2stack</a></td>
    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
 <tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
 <tr><td><a href="pcre2test.html">pcre2test</a></td>
    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
 <tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
 </table>
 <p>
 There are also individual pages that summarize the interface for each function
 in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
 functions.
 </p>
 <table>    
 <tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
 <tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
 <tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
 <tr><td><a href="pcre2_config.html">pcre2_config</a></td>
    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
 <tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
 <tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
 <tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
 <tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (Perl compatible)</td></tr>
 <tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
    <td>&nbsp;&nbsp;Free study data</td></tr>
 <tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
 <tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
 <tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
 <tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
 <tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
 <tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
 <tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
 <tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
 <tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
 <tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
 <tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
 <tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
 <tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
 <tr><td><a href="pcre2_study.html">pcre2_study</a></td>
    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
 <tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_version.html">pcre2_version</a></td>
    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
 </table>
 </html>
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
--- a/doc/html/pcre2callout.html
+++ b/doc/html/pcre2callout.html
@ -0,0 +1,270 @@
 <html>
 <head>
 <title>pcre2callout specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>pcre2callout man page</h1>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 <p>
 This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
 <ul>
 <li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
 <li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 <li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
 <li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
 <li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
 <li><a name="TOC6" href="#SEC6">AUTHOR</a>
 <li><a name="TOC7" href="#SEC7">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 <P>
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
 <b>int (*pcre2_callout)(pcre2_callout_block *);</b>
 </P>
 <br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 <P>
 PCRE2 provides a feature called "callout", which is a means of temporarily
 passing control to the caller of PCRE2 in the middle of pattern matching. The
 caller of PCRE2 provides an external function by putting its entry point in
 a match context (see <b>pcre2_set_callout()</b>) in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation).
 </P>
 <P>
 Within a regular expression, (?C) indicates the points at which the external
 function is to be called. Different callout points can be identified by putting
 a number less than 256 after the letter C. The default value is zero.
 For example, this pattern has two callout points:
 <pre>
  (?C1)abc(?C2)def
 </pre>
 If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
 automatically inserts callouts, all with number 255, before each item in the
 pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
 <pre>
  A(\d{2}|--)
 </pre>
 it is processed as if it were
 <br>
 <br>
 (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
 <br>
 <br>
 Notice that there is a callout before and after each parenthesis and
 alternation bar. If the pattern contains a conditional group whose condition is
 an assertion, an automatic callout is inserted immediately before the
 condition. Such a callout may also be inserted explicitly, for example:
 <pre>
  (?(?C9)(?=a)ab|de)
 </pre>
 This applies only to assertion conditions (because they are themselves
 independent groups).
 </P>
 <P>
 Automatic callouts can be used for tracking the progress of pattern matching.
 The
 <a href="pcre2test.html"><b>pcre2test</b></a>
 program has a pattern qualifier (/auto_callout) that sets automatic callouts;
 when it is used, the output indicates how the pattern is being matched. This is
 useful information when you are trying to optimize the performance of a
 particular pattern.
 </P>
 <br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
 <P>
 You should be aware that, because of optimizations in the way PCRE2 compiles
 and matches patterns, callouts sometimes do not happen exactly as you might
 expect.
 </P>
 <P>
 At compile time, PCRE2 "auto-possessifies" repeated items when it knows that
 what follows cannot be part of the repeat. For example, a+[bc] is compiled as
 if it were a++[bc]. The <b>pcre2test</b> output when this pattern is anchored
 and then applied with automatic callouts to the string "aaaa" is:
 <pre>
  ---&#62;aaaa
   +0 ^        ^
   +1 ^        a+
   +3 ^   ^    [bc]
  No match
 </pre>
 This indicates that when matching [bc] fails, there is no backtracking into a+
 and therefore the callouts that would be taken for the backtracks do not occur.
 You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS
 to <b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
 this is done in <b>pcre2test</b> (using the /no_auto_possess qualifier), the
 output changes to this:
 <pre>
  ---&#62;aaaa
   +0 ^        ^
   +1 ^        a+
   +3 ^   ^    [bc]
   +3 ^  ^     [bc]
   +3 ^ ^      [bc]
   +3 ^^       [bc]
  No match
 </pre>
 This time, when matching [bc] fails, the matcher backtracks into a+ and tries
 again, repeatedly, until a+ itself fails.
 </P>
 <P>
 Other optimizations that provide fast "no match" results also affect callouts.
 For example, if the pattern is
 <pre>
  ab(?C4)cd
 </pre>
 PCRE2 knows that any matching string must contain the letter "d". If the
 subject string is "abyz", the lack of "d" means that matching doesn't ever
 start, and the callout is never reached. However, with "abyd", though the
 result is still no match, the callout is obeyed.
 </P>
 <P>
 PCRE2 also knows the minimum length of a matching string, and will immediately
 give a "no match" return without actually running a match if the subject is not
 long enough, or, for unanchored patterns, if it has been scanned far enough.
 </P>
 <P>
 You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
 option to the matching function, or by starting the pattern with
 (*NO_START_OPT). This slows down the matching process, but does ensure that
 callouts such as the example above are obeyed.
 </P>
 <br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
 <P>
 During matching, when PCRE2 reaches a callout point, the external function that
 is set in the match context is called (if it is set). This applies to both
 normal and DFA matching. The only argument to the callout function is a pointer
 to a <b>pcre2_callout</b> block. This structure contains the following fields:
 <pre>
  uint32_t      <i>version</i>;
  uint32_t      <i>callout_number</i>;
  uint32_t      <i>capture_top</i>;
  uint32_t      <i>capture_last</i>;
  void         *<i>callout_data</i>;
  PCRE2_SIZE   *<i>offset_vector</i>;
  PCRE2_SPTR    <i>mark</i>;
  PCRE2_SPTR    <i>subject</i>;
  PCRE2_SIZE    <i>subject_length</i>;
  PCRE2_SIZE    <i>start_match</i>;
  PCRE2_SIZE    <i>current_position</i>;
  PCRE2_SIZE    <i>pattern_position</i>;
  PCRE2_SIZE    <i>next_item_length</i>;
 </pre>
 The <i>version</i> field contains the version number of the block format. The
 current version is 0. The version number will change in future if additional
 fields are added, but the intention is never to remove any of the existing
 fields.
 </P>
 <P>
 The <i>callout_number</i> field contains the number of the callout, as compiled
 into the pattern (that is, the number after ?C for manual callouts, and 255 for
 automatically generated callouts).
 </P>
 <P>
 The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
 (the "ovector") that was passed to the matching function in the match data
 block. When <b>pcre2_match()</b> is used, the contents can be inspected, in
 order to extract substrings that have been matched so far, in the same way as
 for extracting substrings after a match has completed. For the DFA matching
 function, this field is not useful.
 </P>
 <P>
 The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
 that were passed to the matching function.
 </P>
 <P>
 The <i>start_match</i> field normally contains the offset within the subject at
 which the current match attempt started. However, if the escape sequence \K
 has been encountered, this value is changed to reflect the modified starting
 point. If the pattern is not anchored, the callout function may be called
 several times from the same point in the pattern for different starting points
 in the subject.
 </P>
 <P>
 The <i>current_position</i> field contains the offset within the subject of the
 current match pointer.
 </P>
 <P>
 When the <b>pcre2_match()</b> is used, the <i>capture_top</i> field contains one
 more than the number of the highest numbered captured substring so far. If no
 substrings have been captured, the value of <i>capture_top</i> is one. This is
 always the case when the DFA functions are used, because they do not support
 captured substrings.
 </P>
 <P>
 The <i>capture_last</i> field contains the number of the most recently captured
 substring. However, when a recursion exits, the value reverts to what it was
 outside the recursion, as do the values of all captured substrings. If no
 substrings have been captured, the value of <i>capture_last</i> is 0. This is
 always the case for the DFA matching functions.
 </P>
 <P>
 The <i>callout_data</i> field contains a value that is passed to a matching
 function specifically so that it can be passed back in callouts. It is set in
 the match context when the callout is set up by calling
 <b>pcre2_set_callout()</b> (see the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation).
 </P>
 <P>
 The <i>pattern_position</i> field contains the offset to the next item to be
 matched in the pattern string.
 </P>
 <P>
 The <i>next_item_length</i> field contains the length of the next item to be
 matched in the pattern string. When the callout immediately precedes an
 alternation bar, a closing parenthesis, or the end of the pattern, the length
 is zero. When the callout precedes an opening parenthesis, the length is that
 of the entire subpattern.
 </P>
 <P>
 The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
 help in distinguishing between different automatic callouts, which all have the
 same callout number. However, they are set for all callouts.
 </P>
 <P>
 In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
 the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
 (*THEN) item in the match, or NULL if no such items have been passed. Instances
 of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
 callouts from the DFA matching function this field always contains NULL.
 </P>
 <br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
 <P>
 The external callout function returns an integer to PCRE2. If the value is
 zero, matching proceeds as normal. If the value is greater than zero, matching
 fails at the current point, but the testing of other matching possibilities
 goes ahead, just as if a lookahead assertion had failed. If the value is less
 than zero, the match is abandoned, and the matching function returns the
 negative value.
 </P>
 <P>
 Negative values should normally be chosen from the set of PCRE2_ERROR_xxx
 values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
 failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
 functions; it will never be used by PCRE2 itself.
 </P>
 <br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
 University Computing Service
 <br>
 Cambridge CB2 3QH, England.
 <br>
 </P>
 <br><a name="SEC7" href="#TOC1">REVISION</a><br>
 <P>
 Last updated: 19 October 2014
 <br>
 Copyright &copy; 1997-2014 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
--- a/doc/html/pcre2demo.html
+++ b/doc/html/pcre2demo.html
@ -0,0 +1,443 @@
 <html>
 <head>
 <title>pcre2demo specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>pcre2demo man page</h1>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 <p>
 This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
 <ul>
 </ul>
 <PRE>
 /*************************************************
 *           PCRE2 DEMONSTRATION PROGRAM          *
 *************************************************/
 /* This is a demonstration program to illustrate a straightforward way of
 calling the PCRE2 regular expression library from a C program. See the
 pcre2sample documentation for a short discussion ("man pcre2sample" if you have
 the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
 incompatible with the original PCRE API.
 There are actually three libraries, each supporting a different code unit 
 width. This demonstration program uses the 8-bit library.
 In Unix-like environments, if PCRE2 is installed in your standard system
 libraries, you should be able to compile this program using this command:
 gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
 If PCRE2 is not installed in a standard place, it is likely to be installed
 with support for the pkg-config mechanism. If you have pkg-config, you can
 compile this program using this command:
 gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
 If you do not have pkg-config, you may have to use this:
 gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
  -R/usr/local/lib -lpcre2-8 -o pcre2demo
 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
 library files for PCRE2 are installed on your system. Only some operating
 systems (Solaris is one) use the -R option.
 Building under Windows:
 If you want to statically link this program against a non-dll .a file, you must
 define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
 the following line. */
 /* #define PCRE2_STATIC */
 /* This macro must be defined before including pcre2.h. For a program that uses 
 only one code unit width, it makes it possible to use generic function names 
 such as pcre2_compile(). */
 #define PCRE2_CODE_UNIT_WIDTH 8
 #include &lt;stdio.h&gt;
 #include &lt;string.h&gt;
 #include &lt;pcre2.h&gt;
 /**************************************************************************
 * Here is the program. The API includes the concept of "contexts" for     *
 * setting up unusual interface requirements for compiling and matching,   *
 * such as custom memory managers and non-standard newline definitions.    *
 * This program does not do any of this, so it makes no use of contexts,   *
 * always passing NULL where a context could be given.                     *
 **************************************************************************/
 int main(int argc, char **argv)
 {
 pcre2_code *re;
 PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
 PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
 PCRE2_SPTR name_table;
 int crlf_is_newline;
 int errornumber;
 int find_all;
 int i;
 int namecount;
 int name_entry_size;
 int rc;
 int utf8;
 uint32_t option_bits;
 uint32_t newline;
 PCRE2_SIZE erroroffset;
 PCRE2_SIZE *ovector;
 size_t subject_length;
 pcre2_match_data *match_data;
 /**************************************************************************
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
 * like Perl's /g option. We set the variable find_all to a non-zero value *
 * if the -g option is present. Apart from that, there must be exactly two *
 * arguments.                                                              *
 **************************************************************************/
 find_all = 0;
 for (i = 1; i &lt; argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
    else break;
  }
 /* After the options, we require exactly two arguments, which are the pattern,
 and the subject string. */
 if (argc - i != 2)
  {
  printf("Two arguments required: a regex and a subject string\n");
  return 1;
  }
 /* As pattern and subject are char arguments, they can be straightforwardly
 cast to PCRE2_SPTR as we are working in 8-bit code units. */
 pattern = (PCRE2_SPTR)argv[i];
 subject = (PCRE2_SPTR)argv[i+1];
 subject_length = strlen((char *)subject);
 /*************************************************************************
 * Now we are going to compile the regular expression pattern, and handle *
 * any errors that are detected.                                          *
 *************************************************************************/
 re = pcre2_compile(
  pattern,              /* the pattern */
  -1,                   /* indicates pattern is zero-terminated */ 
  0,                    /* default options */
  &amp;errornumber,         /* for error number */
  &amp;erroroffset,         /* for error offset */
  NULL);                /* use default compile context */
 /* Compilation failed: print the error message and exit. */
 if (re == NULL)
  {
  PCRE2_UCHAR buffer[256]; 
  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
  printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, 
    buffer);
  return 1;
  }
 /*************************************************************************
 * If the compilation succeeded, we call PCRE again, in order to do a     *
 * pattern match against the subject string. This does just ONE match. If *
 * further matching is needed, it will be done below. Before running the  *
 * match we must set up a match_data block for holding the result.        *
 *************************************************************************/
 /* Using this function ensures that the block is exactly the right size for
 the number of capturing parentheses in the pattern. */
 match_data = pcre2_match_data_create_from_pattern(re, NULL);
 rc = pcre2_match(
  re,                   /* the compiled pattern */
  subject,              /* the subject string */
  subject_length,       /* the length of the subject */
  0,                    /* start at offset 0 in the subject */
  0,                    /* default options */
  match_data,           /* block for storing the result */
  NULL);                /* use default match context */
 /* Matching failed: handle error cases */
 if (rc &lt; 0)
  {
  switch(rc)
    {
    case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
    /*
    Handle other special cases if you like
    */
    default: printf("Matching error %d\n", rc); break;
    }
  pcre2_match_data_free(match_data);   /* Release memory used for the match */
  pcre2_code_free(re);                 /* data and the compiled pattern. */
  return 1;
  }
 /* Match succeded. Get a pointer to the output vector, where string offsets are 
 stored. */
 ovector = pcre2_get_ovector_pointer(match_data);
 printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
 /*************************************************************************
 * We have found the first match within the subject string. If the output *
 * vector wasn't big enough, say so. Then output any substrings that were *
 * captured.                                                              *
 *************************************************************************/
 /* The output vector wasn't big enough. This should not happen, because we used 
 pcre2_match_data_create_from_pattern() above. */
 if (rc == 0)
  printf("ovector was not big enough for all the captured substrings\n");
 /* Show substrings stored in the output vector by number. Obviously, in a real
 application you might want to do things other than print them. */
 for (i = 0; i &lt; rc; i++)
  {
  PCRE2_SPTR substring_start = subject + ovector[2*i];
  size_t substring_length = ovector[2*i+1] - ovector[2*i];
  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
  }
 /**************************************************************************
 * That concludes the basic part of this demonstration program. We have    *
 * compiled a pattern, and performed a single match. The code that follows *
 * shows first how to access named substrings, and then how to code for    *
 * repeated matches on the same subject.                                   *
 **************************************************************************/
 /* See if there are any named substrings, and if so, show them by name. First
 we have to extract the count of named parentheses from the pattern. */
 (void)pcre2_pattern_info(
  re,                   /* the compiled pattern */
  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  &amp;namecount);          /* where to put the answer */
 if (namecount &lt;= 0) printf("No named substrings\n"); else
  {
  PCRE2_SPTR tabptr;
  printf("Named substrings\n");
  /* Before we can access the substrings, we must extract the table for
  translating names to numbers, and the size of each entry in the table. */
  (void)pcre2_pattern_info(
    re,                       /* the compiled pattern */
    PCRE2_INFO_NAMETABLE,     /* address of the table */
    &amp;name_table);             /* where to put the answer */
  (void)pcre2_pattern_info(
    re,                       /* the compiled pattern */
    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
    &amp;name_entry_size);        /* where to put the answer */
  /* Now we can scan the table and, for each entry, print the number, the name,
  and the substring itself. In the 8-bit library the number is held in two 
  bytes, most significant first. */
  tabptr = name_table;
  for (i = 0; i &lt; namecount; i++)
    {
    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
    tabptr += name_entry_size;
    }
  }
 /*************************************************************************
 * If the "-g" option was given on the command line, we want to continue  *
 * to search for additional matches in the subject string, in a similar   *
 * way to the /g option in Perl. This turns out to be trickier than you   *
 * might think because of the possibility of matching an empty string.    *
 * What happens is as follows:                                            *
 *                                                                        *
 * If the previous match was NOT for an empty string, we can just start   *
 * the next match at the end of the previous one.                         *
 *                                                                        *
 * If the previous match WAS for an empty string, we can't do that, as it *
 * would lead to an infinite loop. Instead, a call of pcre2_match() is    *
 * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
 * first of these tells PCRE2 that an empty string at the start of the    *
 * subject is not a valid match; other possibilities must be tried. The   *
 * second flag restricts PCRE2 to one match attempt at the initial string *
 * position. If this match succeeds, an alternative to the empty string   *
 * match has been found, and we can print it and proceed round the loop,  *
 * advancing by the length of whatever was found. If this match does not  *
 * succeed, we still stay in the loop, advancing by just one character.   *
 * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
 * more than one byte.                                                    *
 *                                                                        *
 * However, there is a complication concerned with newlines. When the     *
 * newline convention is such that CRLF is a valid newline, we must       *
 * advance by two characters rather than one. The newline convention can  *
 * be set in the regex by (*CR), etc.; if not, we must find the default.  *
 *************************************************************************/
 if (!find_all)     /* Check for -g */
  {
  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
  pcre2_code_free(re);                /* for the match data and the pattern. */
  return 0;                           /* Exit the program. */
  }
 /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
 sequence. First, find the options with which the regex was compiled and extract
 the UTF state. */
 (void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &amp;option_bits);
 utf8 = (option_bits &amp; PCRE2_UTF) != 0;
 /* Now find the newline convention and see whether CRLF is a valid newline
 sequence. */
 (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
 crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
                  newline == PCRE2_NEWLINE_CRLF ||
                  newline == PCRE2_NEWLINE_ANYCRLF; 
 /* Loop for second and subsequent matches */
 for (;;)
  {
  uint32_t options = 0;                    /* Normally no options */
  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
  /* If the previous match was for an empty string, we are finished if we are
  at the end of the subject. Otherwise, arrange to run another match at the
  same point to see if a non-empty match can be found. */
  if (ovector[0] == ovector[1])
    {
    if (ovector[0] == subject_length) break;
    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
    }
  /* Run the next matching operation */
  rc = pcre2_match(
    re,                   /* the compiled pattern */
    subject,              /* the subject string */
    subject_length,       /* the length of the subject */
    start_offset,         /* starting offset in the subject */
    options,              /* options */
    match_data,           /* block for storing the result */
    NULL);                /* use default match context */
  /* This time, a result of NOMATCH isn't an error. If the value in "options"
  is zero, it just means we have found all possible matches, so the loop ends.
  Otherwise, it means we have failed to find a non-empty-string match at a
  point where there was a previous empty-string match. In this case, we do what
  Perl does: advance the matching position by one character, and continue. We
  do this by setting the "end of previous match" offset, because that is picked
  up at the top of the loop as the point at which to start again.
  There are two complications: (a) When CRLF is a valid newline sequence, and
  the current position is just before it, advance by an extra byte. (b)
  Otherwise we must ensure that we skip an entire UTF character if we are in
  UTF mode. */
  if (rc == PCRE2_ERROR_NOMATCH)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one code unit */
    if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
        start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
        subject[start_offset] == '\r' &amp;&amp;
        subject[start_offset + 1] == '\n')
      ovector[1] += 1;                          /* Advance by one more. */
    else if (utf8)                              /* Otherwise, ensure we */
      {                                         /* advance a whole UTF-8 */
      while (ovector[1] &lt; subject_length)       /* character. */
        {
        if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
        ovector[1] += 1;
        }
      }
    continue;    /* Go round the loop again */
    }
  /* Other matching errors are not recoverable. */
  if (rc &lt; 0)
    {
    printf("Matching error %d\n", rc);
    pcre2_match_data_free(match_data);
    pcre2_code_free(re);
    return 1;
    }
  /* Match succeded */
  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
  /* The match succeeded, but the output vector wasn't big enough. This
  should not happen. */
  if (rc == 0)
    printf("ovector was not big enough for all the captured substrings\n");
  /* As before, show substrings stored in the output vector by number, and then
  also any named substrings. */
  for (i = 0; i &lt; rc; i++)
    {
    PCRE2_SPTR substring_start = subject + ovector[2*i];
    size_t substring_length = ovector[2*i+1] - ovector[2*i];
    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
    }
  if (namecount &lt;= 0) printf("No named substrings\n"); else
    {
    PCRE2_SPTR tabptr = name_table;
    printf("Named substrings\n");
    for (i = 0; i &lt; namecount; i++)
      {
      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
      tabptr += name_entry_size;
      }
    }
  }      /* End of loop to find second and subsequent matches */
 printf("\n");
 pcre2_match_data_free(match_data);
 pcre2_code_free(re);
 return 0;
 }
 /* End of pcre2demo.c */
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -0,0 +1,270 @@
 <html>
 <head>
 <title>pcre2unicode specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>pcre2unicode man page</h1>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 <p>
 This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
 <br><b>
 UNICODE AND UTF SUPPORT
 </b><br>
 <P>
 When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
 character properties and can process text strings in UTF-8, UTF-16, or UTF-32
 format (depending on the code unit width). By default, PCRE2 assumes that one
 code unit is one character. To process a pattern as a UTF string, where a
 character may require more than one code unit, you must call
 <a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
 with the PCRE2_UTF option flag, or the pattern must start with the sequence
 (*UTF). When either of these is the case, both the pattern and any subject
 strings that are matched against it are treated as UTF strings instead of
 strings of individual one-code-unit characters.
 </P>
 <P>
 If you build PCRE2 with Unicode support, the library will be bigger, but the
 additional run time overhead is limited to testing the PCRE2_UTF flag
 occasionally, so should not be very much.
 </P>
 <br><b>
 UNICODE PROPERTY SUPPORT
 </b><br>
 <P>
 When PCRE2 is built with Unicode support, the escape sequences \p{..},
 \P{..}, and \X can be used. The Unicode properties that can be tested are
 limited to the general category properties such as Lu for an upper case letter
 or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
 the derived properties Any and L&. Full lists are given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 and
 <a href="pcre2syntax.html"><b>pcre2syntax</b></a>
 documentation. Only the short names for properties are supported. For example,
 \p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
 Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
 compatibility with Perl 5.6. PCRE does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
 </b><br>
 <P>
 Codepoints less than 256 can be specified in patterns by either braced or
 unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
 values have to use braced sequences. Unbraced octal code points up to \777 are
 also recognized; larger ones can be coded using \o{...}.
 </P>
 <P>
 In UTF modes, repeat quantifiers apply to complete UTF characters, not to
 individual code units.
 </P>
 <P>
 In UTF modes, the dot metacharacter matches one UTF character instead of a
 single code unit.
 </P>
 <P>
 The escape sequence \C can be used to match a single code unit, in a UTF mode, 
 but its use can lead to some strange effects because it breaks up multi-unit
 characters (see the description of \C in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation). The use of \C is not supported in the alternative matching
 function <b>pcre2_dfa_exec()</b>, nor is it supported in UTF mode by the JIT
 optimization. If JIT optimization is requested for a UTF pattern that contains
 \C, it will not succeed, and so the matching will be carried out by the normal
 interpretive function.
 </P>
 <P>
 The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
 characters of any code value, but, by default, the characters that PCRE2
 recognizes as digits, spaces, or word characters remain the same set as in
 non-UTF mode, all with code points less than 256. This remains true even when
 PCRE2 is built to include Unicode support, because to do otherwise would slow
 down matching in many common cases. Note that this also applies to \b
 and \B, because they are defined in terms of \w and \W. If you want
 to test for a wider sense of, say, "digit", you can use explicit Unicode
 property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option,
 the way that the character escapes work is changed so that Unicode properties
 are used to determine which characters match. There are more details in the
 section on
 <a href="pcre2pattern.html#genericchartypes">generic character types</a>
 in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
 <P>
 Similarly, characters that match the POSIX named character classes are all
 low-valued characters, unless the PCRE2_UCP option is set.
 </P>
 <P>
 However, the special horizontal and vertical white space matching escapes (\h,
 \H, \v, and \V) do match all the appropriate Unicode characters, whether or
 not PCRE2_UCP is set.
 </P>
 <P>
 Case-insensitive matching in UTF mode makes use of Unicode properties. A few
 Unicode characters such as Greek sigma have more than two codepoints that are
 case-equivalent, and these are treated as such.
 </P>
 <br><b>
 VALIDITY OF UTF STRINGS
 </b><br>
 <P>
 When the PCRE2_UTF option is set, the strings passed as patterns and subjects
 are (by default) checked for validity on entry to the relevant functions. 
 If an invalid UTF string is passed, an error return is given. 
 </P>
 <P>
 UTF-16 and UTF-32 strings can indicate their endianness by special code knows
 as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
 strings to be in host byte order.
 </P>
 <P>
 The entire string is checked before any other processing takes place. In
 addition to checking the format of the string, there is a check to ensure that
 all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
 The so-called "non-character" code points are not excluded because Unicode
 corrigendum #9 makes it clear that they should not be.
 </P>
 <P>
 Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
 where they are used in pairs to encode code points with values greater than
 0xFFFF. The code points that are encoded by UTF-16 pairs are available
 independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
 surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
 UTF-32.)
 </P>
 <P>
 In some situations, you may already know that your strings are valid, and
 therefore want to skip these checks in order to improve performance, for
 example in the case of a long subject string that is being scanned repeatedly.
 If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
 assumes that the pattern or subject it is given (respectively) contains only
 valid UTF code unit sequences.
 </P>
 <P>
 Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
 the pattern; it does not also apply to subject strings. If you want to disable
 the check for a subject string you must pass this option to <b>pcre2_exec()</b>
 or <b>pcre2_dfa_exec()</b>.
 </P>
 <P>
 If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
 is undefined and your program may crash or loop indefinitely.
 <a name="utf8strings"></a></P>
 <br><b>
 Errors in UTF-8 strings
 </b><br>
 <P>
 The following negative error codes are given for invalid UTF-8 strings:
 <pre>
  PCRE2_ERROR_UTF8_ERR1
  PCRE2_ERROR_UTF8_ERR2
  PCRE2_ERROR_UTF8_ERR3
  PCRE2_ERROR_UTF8_ERR4
  PCRE2_ERROR_UTF8_ERR5
 </pre>
 The string ends with a truncated UTF-8 character; the code specifies how many
 bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
 no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
 allows for up to 6 bytes, and this is checked first; hence the possibility of
 4 or 5 missing bytes.
 <pre>
  PCRE2_ERROR_UTF8_ERR6
  PCRE2_ERROR_UTF8_ERR7
  PCRE2_ERROR_UTF8_ERR8
  PCRE2_ERROR_UTF8_ERR9
  PCRE2_ERROR_UTF8_ERR10
 </pre>
 The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
 character do not have the binary value 0b10 (that is, either the most
 significant bit is 0, or the next bit is 1).
 <pre>
  PCRE2_ERROR_UTF8_ERR11
  PCRE2_ERROR_UTF8_ERR12
 </pre>
 A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
 these code points are excluded by RFC 3629.
 <pre>
  PCRE2_ERROR_UTF8_ERR13
 </pre>
 A 4-byte character has a value greater than 0x10fff; these code points are
 excluded by RFC 3629.
 <pre>
  PCRE2_ERROR_UTF8_ERR14
 </pre>
 A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
 code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
 from UTF-8.
 <pre>
  PCRE2_ERROR_UTF8_ERR15
  PCRE2_ERROR_UTF8_ERR16
  PCRE2_ERROR_UTF8_ERR17
  PCRE2_ERROR_UTF8_ERR18
  PCRE2_ERROR_UTF8_ERR19
 </pre>
 A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
 value that can be represented by fewer bytes, which is invalid. For example,
 the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
 one byte.
 <pre>
  PCRE2_ERROR_UTF8_ERR20
 </pre>
 The two most significant bits of the first byte of a character have the binary
 value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
 byte can only validly occur as the second or subsequent byte of a multi-byte
 character.
 <pre>
  PCRE2_ERROR_UTF8_ERR21
 </pre>
 The first byte of a character has the value 0xfe or 0xff. These values can
 never occur in a valid UTF-8 string.
 <a name="utf16strings"></a></P>
 <br><b>
 Errors in UTF-16 strings
 </b><br>
 <P>
 The following negative error codes are given for invalid UTF-16 strings:
 <pre>
  PCRE_UTF16_ERR1  Missing low surrogate at end of string
  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
  PCRE_UTF16_ERR3  Isolated low surrogate
 <a name="utf32strings"></a></PRE>
 </P>
 <br><b>
 Errors in UTF-32 strings
 </b><br>
 <P>
 The following negative error codes are given for invalid UTF-32 strings:
 <pre>
  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
  PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
 </PRE>
 </P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
 University Computing Service
 <br>
 Cambridge CB2 3QH, England.
 <br>
 </P>
 <br><b>
 REVISION
 </b><br>
 <P>
 Last updated: 16 September 2014
 <br>
 Copyright &copy; 1997-2014 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
--- a/doc/index.html.src
+++ b/doc/index.html.src
@ -0,0 +1,177 @@
 <html>
 <!-- This is a manually maintained file that is the root of the HTML version of 
     the PCRE2 documentation. When the HTML documents are built from the man 
     page versions, the entire doc/html directory is emptied, this file is then 
     copied into doc/html/index.html, and the remaining files therein are 
     created by the 132html script.
 -->      
 <head>
 <title>PCRE2 specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
 <p>
 The HTML documentation for PCRE2 consists of a number of pages that are listed
 below in alphabetical order. If you are new to PCRE2, please read the first one
 first.
 </p>
 <table>
 <tr><td><a href="pcre2.html">pcre</a></td>
    <td>&nbsp;&nbsp;Introductory page</td></tr>
 <tr><td><a href="pcre2-config.html">pcre-config</a></td>
    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
 <tr><td><a href="pcre2api.html">pcreapi</a></td>
    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
 <tr><td><a href="pcre2build.html">pcrebuild</a></td>
    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
 <tr><td><a href="pcre2callout.html">pcre2callout</a></td>
    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
 <tr><td><a href="pcre2compat.html">pcre2compat</a></td>
    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
 <tr><td><a href="pcre2demo.html">pcre2demo</a></td>
    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
 <tr><td><a href="pcre2grep.html">pcre2grep</a></td>
    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
 <tr><td><a href="pcre2jit.html">pcre2jit</a></td>
    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
 <tr><td><a href="pcre2limits.html">pcre2limits</a></td>
    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
 <tr><td><a href="pcre2matching.html">pcre2matching</a></td>
    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
 <tr><td><a href="pcre2partial.html">pcre2partial</a></td>
    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
 <tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
 <tr><td><a href="pcre2perform.html">pcre2perform</a></td>
    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
 <tr><td><a href="pcre2posix.html">pcre2posix</a></td>
    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
 <tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
 <tr><td><a href="pcre2sample.html">pcre2sample</a></td>
    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
 <tr><td><a href="pcre2stack.html">pcre2stack</a></td>
    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
 <tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
 <tr><td><a href="pcre2test.html">pcre2test</a></td>
    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
 <tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
 </table>
 <p>
 There are also individual pages that summarize the interface for each function
 in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
 functions.
 </p>
 <table>    
 <tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
 <tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
 <tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
 <tr><td><a href="pcre2_config.html">pcre2_config</a></td>
    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
 <tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
 <tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
 <tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
 <tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (Perl compatible)</td></tr>
 <tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
    <td>&nbsp;&nbsp;Free study data</td></tr>
 <tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
 <tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
 <tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
 <tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
 <tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
 <tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
 <tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
 <tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
 <tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
 <tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
 <tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
 <tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
 <tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
 <tr><td><a href="pcre2_study.html">pcre2_study</a></td>
    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
 <tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
 <tr><td><a href="pcre2_version.html">pcre2_version</a></td>
    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
 </table>
 </html>
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -214,7 +214,7 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
 .sp
 .B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP, PCRE2_SIZE \fIlength\fP);
-.sp
+.fi
 .
 .
 .SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
--- a/doc/pcre2demo.3
+++ b/doc/pcre2demo.3
@ -0,0 +1,441 @@
 .\" Start example.
 .de EX
 .  nr mE \\n(.f
 .  nf
 .  nh
 .  ft CW
 ..
 .
 .
 .\" End example.
 .de EE
 .  ft \\n(mE
 .  fi
 .  hy \\n(HY
 ..
 .
 .EX
 /*************************************************
 *           PCRE2 DEMONSTRATION PROGRAM          *
 *************************************************/
 /* This is a demonstration program to illustrate a straightforward way of
 calling the PCRE2 regular expression library from a C program. See the
 pcre2sample documentation for a short discussion ("man pcre2sample" if you have
 the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
 incompatible with the original PCRE API.
 There are actually three libraries, each supporting a different code unit 
 width. This demonstration program uses the 8-bit library.
 In Unix-like environments, if PCRE2 is installed in your standard system
 libraries, you should be able to compile this program using this command:
 gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
 If PCRE2 is not installed in a standard place, it is likely to be installed
 with support for the pkg-config mechanism. If you have pkg-config, you can
 compile this program using this command:
 gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
 If you do not have pkg-config, you may have to use this:
 gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
  -R/usr/local/lib -lpcre2-8 -o pcre2demo
 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
 library files for PCRE2 are installed on your system. Only some operating
 systems (Solaris is one) use the -R option.
 Building under Windows:
 If you want to statically link this program against a non-dll .a file, you must
 define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
 the following line. */
 /* #define PCRE2_STATIC */
 /* This macro must be defined before including pcre2.h. For a program that uses 
 only one code unit width, it makes it possible to use generic function names 
 such as pcre2_compile(). */
 #define PCRE2_CODE_UNIT_WIDTH 8
 #include <stdio.h>
 #include <string.h>
 #include <pcre2.h>
 /**************************************************************************
 * Here is the program. The API includes the concept of "contexts" for     *
 * setting up unusual interface requirements for compiling and matching,   *
 * such as custom memory managers and non-standard newline definitions.    *
 * This program does not do any of this, so it makes no use of contexts,   *
 * always passing NULL where a context could be given.                     *
 **************************************************************************/
 int main(int argc, char **argv)
 {
 pcre2_code *re;
 PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
 PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
 PCRE2_SPTR name_table;
 int crlf_is_newline;
 int errornumber;
 int find_all;
 int i;
 int namecount;
 int name_entry_size;
 int rc;
 int utf8;
 uint32_t option_bits;
 uint32_t newline;
 PCRE2_SIZE erroroffset;
 PCRE2_SIZE *ovector;
 size_t subject_length;
 pcre2_match_data *match_data;
 /**************************************************************************
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
 * like Perl's /g option. We set the variable find_all to a non-zero value *
 * if the -g option is present. Apart from that, there must be exactly two *
 * arguments.                                                              *
 **************************************************************************/
 find_all = 0;
 for (i = 1; i < argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
    else break;
  }
 /* After the options, we require exactly two arguments, which are the pattern,
 and the subject string. */
 if (argc - i != 2)
  {
  printf("Two arguments required: a regex and a subject string\en");
  return 1;
  }
 /* As pattern and subject are char arguments, they can be straightforwardly
 cast to PCRE2_SPTR as we are working in 8-bit code units. */
 pattern = (PCRE2_SPTR)argv[i];
 subject = (PCRE2_SPTR)argv[i+1];
 subject_length = strlen((char *)subject);
 /*************************************************************************
 * Now we are going to compile the regular expression pattern, and handle *
 * any errors that are detected.                                          *
 *************************************************************************/
 re = pcre2_compile(
  pattern,              /* the pattern */
  -1,                   /* indicates pattern is zero-terminated */ 
  0,                    /* default options */
  &errornumber,         /* for error number */
  &erroroffset,         /* for error offset */
  NULL);                /* use default compile context */
 /* Compilation failed: print the error message and exit. */
 if (re == NULL)
  {
  PCRE2_UCHAR buffer[256]; 
  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
  printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, 
    buffer);
  return 1;
  }
 /*************************************************************************
 * If the compilation succeeded, we call PCRE again, in order to do a     *
 * pattern match against the subject string. This does just ONE match. If *
 * further matching is needed, it will be done below. Before running the  *
 * match we must set up a match_data block for holding the result.        *
 *************************************************************************/
 /* Using this function ensures that the block is exactly the right size for
 the number of capturing parentheses in the pattern. */
 match_data = pcre2_match_data_create_from_pattern(re, NULL);
 rc = pcre2_match(
  re,                   /* the compiled pattern */
  subject,              /* the subject string */
  subject_length,       /* the length of the subject */
  0,                    /* start at offset 0 in the subject */
  0,                    /* default options */
  match_data,           /* block for storing the result */
  NULL);                /* use default match context */
 /* Matching failed: handle error cases */
 if (rc < 0)
  {
  switch(rc)
    {
    case PCRE2_ERROR_NOMATCH: printf("No match\en"); break;
    /*
    Handle other special cases if you like
    */
    default: printf("Matching error %d\en", rc); break;
    }
  pcre2_match_data_free(match_data);   /* Release memory used for the match */
  pcre2_code_free(re);                 /* data and the compiled pattern. */
  return 1;
  }
 /* Match succeded. Get a pointer to the output vector, where string offsets are 
 stored. */
 ovector = pcre2_get_ovector_pointer(match_data);
 printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
 /*************************************************************************
 * We have found the first match within the subject string. If the output *
 * vector wasn't big enough, say so. Then output any substrings that were *
 * captured.                                                              *
 *************************************************************************/
 /* The output vector wasn't big enough. This should not happen, because we used 
 pcre2_match_data_create_from_pattern() above. */
 if (rc == 0)
  printf("ovector was not big enough for all the captured substrings\en");
 /* Show substrings stored in the output vector by number. Obviously, in a real
 application you might want to do things other than print them. */
 for (i = 0; i < rc; i++)
  {
  PCRE2_SPTR substring_start = subject + ovector[2*i];
  size_t substring_length = ovector[2*i+1] - ovector[2*i];
  printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
  }
 /**************************************************************************
 * That concludes the basic part of this demonstration program. We have    *
 * compiled a pattern, and performed a single match. The code that follows *
 * shows first how to access named substrings, and then how to code for    *
 * repeated matches on the same subject.                                   *
 **************************************************************************/
 /* See if there are any named substrings, and if so, show them by name. First
 we have to extract the count of named parentheses from the pattern. */
 (void)pcre2_pattern_info(
  re,                   /* the compiled pattern */
  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  &namecount);          /* where to put the answer */
 if (namecount <= 0) printf("No named substrings\en"); else
  {
  PCRE2_SPTR tabptr;
  printf("Named substrings\en");
  /* Before we can access the substrings, we must extract the table for
  translating names to numbers, and the size of each entry in the table. */
  (void)pcre2_pattern_info(
    re,                       /* the compiled pattern */
    PCRE2_INFO_NAMETABLE,     /* address of the table */
    &name_table);             /* where to put the answer */
  (void)pcre2_pattern_info(
    re,                       /* the compiled pattern */
    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
    &name_entry_size);        /* where to put the answer */
  /* Now we can scan the table and, for each entry, print the number, the name,
  and the substring itself. In the 8-bit library the number is held in two 
  bytes, most significant first. */
  tabptr = name_table;
  for (i = 0; i < namecount; i++)
    {
    int n = (tabptr[0] << 8) | tabptr[1];
    printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
    tabptr += name_entry_size;
    }
  }
 /*************************************************************************
 * If the "-g" option was given on the command line, we want to continue  *
 * to search for additional matches in the subject string, in a similar   *
 * way to the /g option in Perl. This turns out to be trickier than you   *
 * might think because of the possibility of matching an empty string.    *
 * What happens is as follows:                                            *
 *                                                                        *
 * If the previous match was NOT for an empty string, we can just start   *
 * the next match at the end of the previous one.                         *
 *                                                                        *
 * If the previous match WAS for an empty string, we can't do that, as it *
 * would lead to an infinite loop. Instead, a call of pcre2_match() is    *
 * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
 * first of these tells PCRE2 that an empty string at the start of the    *
 * subject is not a valid match; other possibilities must be tried. The   *
 * second flag restricts PCRE2 to one match attempt at the initial string *
 * position. If this match succeeds, an alternative to the empty string   *
 * match has been found, and we can print it and proceed round the loop,  *
 * advancing by the length of whatever was found. If this match does not  *
 * succeed, we still stay in the loop, advancing by just one character.   *
 * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
 * more than one byte.                                                    *
 *                                                                        *
 * However, there is a complication concerned with newlines. When the     *
 * newline convention is such that CRLF is a valid newline, we must       *
 * advance by two characters rather than one. The newline convention can  *
 * be set in the regex by (*CR), etc.; if not, we must find the default.  *
 *************************************************************************/
 if (!find_all)     /* Check for -g */
  {
  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
  pcre2_code_free(re);                /* for the match data and the pattern. */
  return 0;                           /* Exit the program. */
  }
 /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
 sequence. First, find the options with which the regex was compiled and extract
 the UTF state. */
 (void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
 utf8 = (option_bits & PCRE2_UTF) != 0;
 /* Now find the newline convention and see whether CRLF is a valid newline
 sequence. */
 (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
 crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
                  newline == PCRE2_NEWLINE_CRLF ||
                  newline == PCRE2_NEWLINE_ANYCRLF; 
 /* Loop for second and subsequent matches */
 for (;;)
  {
  uint32_t options = 0;                    /* Normally no options */
  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
  /* If the previous match was for an empty string, we are finished if we are
  at the end of the subject. Otherwise, arrange to run another match at the
  same point to see if a non-empty match can be found. */
  if (ovector[0] == ovector[1])
    {
    if (ovector[0] == subject_length) break;
    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
    }
  /* Run the next matching operation */
  rc = pcre2_match(
    re,                   /* the compiled pattern */
    subject,              /* the subject string */
    subject_length,       /* the length of the subject */
    start_offset,         /* starting offset in the subject */
    options,              /* options */
    match_data,           /* block for storing the result */
    NULL);                /* use default match context */
  /* This time, a result of NOMATCH isn't an error. If the value in "options"
  is zero, it just means we have found all possible matches, so the loop ends.
  Otherwise, it means we have failed to find a non-empty-string match at a
  point where there was a previous empty-string match. In this case, we do what
  Perl does: advance the matching position by one character, and continue. We
  do this by setting the "end of previous match" offset, because that is picked
  up at the top of the loop as the point at which to start again.
  There are two complications: (a) When CRLF is a valid newline sequence, and
  the current position is just before it, advance by an extra byte. (b)
  Otherwise we must ensure that we skip an entire UTF character if we are in
  UTF mode. */
  if (rc == PCRE2_ERROR_NOMATCH)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one code unit */
    if (crlf_is_newline &&                      /* If CRLF is newline & */
        start_offset < subject_length - 1 &&    /* we are at CRLF, */
        subject[start_offset] == '\er' &&
        subject[start_offset + 1] == '\en')
      ovector[1] += 1;                          /* Advance by one more. */
    else if (utf8)                              /* Otherwise, ensure we */
      {                                         /* advance a whole UTF-8 */
      while (ovector[1] < subject_length)       /* character. */
        {
        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
        ovector[1] += 1;
        }
      }
    continue;    /* Go round the loop again */
    }
  /* Other matching errors are not recoverable. */
  if (rc < 0)
    {
    printf("Matching error %d\en", rc);
    pcre2_match_data_free(match_data);
    pcre2_code_free(re);
    return 1;
    }
  /* Match succeded */
  printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]);
  /* The match succeeded, but the output vector wasn't big enough. This
  should not happen. */
  if (rc == 0)
    printf("ovector was not big enough for all the captured substrings\en");
  /* As before, show substrings stored in the output vector by number, and then
  also any named substrings. */
  for (i = 0; i < rc; i++)
    {
    PCRE2_SPTR substring_start = subject + ovector[2*i];
    size_t substring_length = ovector[2*i+1] - ovector[2*i];
    printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
    }
  if (namecount <= 0) printf("No named substrings\en"); else
    {
    PCRE2_SPTR tabptr = name_table;
    printf("Named substrings\en");
    for (i = 0; i < namecount; i++)
      {
      int n = (tabptr[0] << 8) | tabptr[1];
      printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
      tabptr += name_entry_size;
      }
    }
  }      /* End of loop to find second and subsequent matches */
 printf("\en");
 pcre2_match_data_free(match_data);
 pcre2_code_free(re);
 return 0;
 }
 /* End of pcre2demo.c */
 .EE
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -154,7 +154,7 @@ Do not output the version number of \fBpcre2test\fP at the start of execution.
 \fB-S\fP \fIsize\fP
 On Unix-like systems, set the size of the run-time stack to \fIsize\fP
 megabytes.
-.TP10
+.TP 10
 \fB-subject\fP \fImodifier-list\fP
 Behave as if each subject line contains the given modifiers.
 .TP 10
@ -366,7 +366,7 @@ include a closing square bracket in the characters, code it as \ex5D.
 A backslash followed by an equals sign marke the end of the subject string and
 the start of a modifier list. For example:
 .sp
-  abc\=notbol,notempty
+  abc\e=notbol,notempty
 .sp
 A backslash followed by any other non-alphanumeric character just escapes that
 character. A backslash followed by anything else causes an error. However, if
@ -746,7 +746,7 @@ the actual match are indicated in the output by '<' or '>' characters
 underneath them. Here is an example:
 .sp
  /(?<=pqr)abc(?=xyz)/
-      123pqrabcxyz456\=allusedtext
+      123pqrabcxyz456\e=allusedtext
   0: pqrabcxyz
      <<<   >>>
 .sp
@ -789,7 +789,7 @@ The \fBcopy\fP and \fBget\fP modifiers can be used to test the
 They can be given more than once, and each can specify a group name or number,
 for example:
 .sp
-   abcd\=copy=1,copy=3,get=G1
+   abcd\e=copy=1,copy=3,get=G1
 .sp
 If the \fB#subject\fP command is used to set default copy and get lists, these
 can be unset by specifying a negative number for numbered groups and an empty
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
--- a/src/pcre2demo.c
+++ b/src/pcre2demo.c
@ -420,4 +420,4 @@ pcre2_code_free(re);
 return 0;
 }
-/* End of pcredemo.c */
+/* End of pcre2demo.c */
		`@ -0,0 +1 @@`
							`This is a placeholder README file for a work in progress.`