Documentation scripts

2014-09-23 11:35:51 +00:00 · 2014-09-23 11:35:51 +00:00 · d5495a30f4
commit d5495a30f4
parent a625f0ea01
19 changed files with 10412 additions and 6 deletions
--- a/313
+++ b/313
@ -0,0 +1,313 @@
+#! /usr/bin/perl -w
+
+# Script to turn PCRE2 man pages into HTML
+
+
+# Subroutine to handle font changes and other escapes
+
+sub do_line {
+my($s) = $_[0];
+
+$s =~ s/</&#60;/g;                   # Deal with < and >
+$s =~ s/>/&#62;/g;
+$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
+$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
+$s =~ s"\\e"\\"g;
+$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
+$s;
+}
+
+# Subroutine to ensure not in a paragraph
+
+sub end_para {
+if ($inpara)
+  {
+  print TEMP "</PRE>\n" if ($inpre);
+  print TEMP "</P>\n";
+  }
+$inpara = $inpre = 0;
+$wrotetext = 0;
+}
+
+# Subroutine to start a new paragraph
+
+sub new_para {
+&end_para();
+print TEMP "<P>\n";
+$inpara = 1;
+}
+
+
+# Main program
+
+$innf = 0;
+$inpara = 0;
+$inpre = 0;
+$wrotetext = 0;
+$toc = 0;
+$ref = 1;
+
+while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
+  {
+  $toc = 1 if $ARGV[0] eq "-toc";
+  shift;
+  }
+
+# Initial output to STDOUT
+
+print <<End ;
+<html>
+<head>
+<title>$ARGV[0] specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>$ARGV[0] man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+End
+
+print "<ul>\n" if ($toc);
+
+open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
+
+while (<STDIN>)
+  {
+  # Handle lines beginning with a dot
+
+  if (/^\./)
+    {
+    # Some of the PCRE2 man pages used to contain instances of .br. However,
+    # they should have all been removed because they cause trouble in some
+    # (other) automated systems that translate man pages to HTML. Complain if
+    # we find .br or .in (another macro that is deprecated).
+
+    if (/^\.br/ || /^\.in/)
+      {
+      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
+      print STDERR "*** $_\n";
+      die "*** Processing abandoned\n";
+      }
+
+    # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
+
+    elsif (/^\.nf/)
+      {
+      $innf = 1;
+      }
+
+    elsif (/^\.fi/)
+      {
+      $innf = 0;
+      }
+
+    # Handling .sp is subtle. If it is inside a literal section, do nothing if
+    # the next line is a non literal text line; similarly, if not inside a
+    # literal section, do nothing if a literal follows, unless we are inside
+    # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
+    # literal sections will do the spacing. Always skip if no previous output.
+
+    elsif (/^\.sp/)
+      {
+      if ($wrotetext)
+        {
+        $_ = <STDIN>;
+        if ($inpre)
+          {
+          print TEMP "\n" if (/^[\s.]/);
+          }
+        else
+          {
+          print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
+          }
+        redo;    # Now process the lookahead line we just read
+        }
+      }
+    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
+      {
+      &new_para();
+      }
+    elsif (/^\.SH\s*("?)(.*)\1/)
+      {
+      # Ignore the NAME section
+      if ($2 =~ /^NAME\b/)
+        {
+        <STDIN>;
+        next;
+        }
+
+      &end_para();
+      my($title) = &do_line($2);
+      if ($toc)
+        {
+        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
+          $ref, $ref);
+        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
+          $ref, $ref);
+        $ref++;
+        }
+      else
+        {
+        print TEMP "<br><b>\n$title\n</b><br>\n";
+        }
+      }
+    elsif (/^\.SS\s*("?)(.*)\1/)
+      {
+      &end_para();
+      my($title) = &do_line($2);
+      print TEMP "<br><b>\n$title\n</b><br>\n";
+      }
+    elsif (/^\.B\s*(.*)/)
+      {
+      &new_para() if (!$inpara);
+      $_ = &do_line($1);
+      s/"(.*?)"/$1/g;
+      print TEMP "<b>$_</b>\n";
+      $wrotetext = 1;
+      }
+    elsif (/^\.I\s*(.*)/)
+      {
+      &new_para() if (!$inpara);
+      $_ = &do_line($1);
+      s/"(.*?)"/$1/g;
+      print TEMP "<i>$_</i>\n";
+      $wrotetext = 1;
+      }
+
+    # A comment that starts "HREF" takes the next line as a name that
+    # is turned into a hyperlink, using the text given, which might be
+    # in a special font. If it ends in () or (digits) or punctuation, they
+    # aren't part of the link.
+
+    elsif (/^\.\\"\s*HREF/)
+      {
+      $_=<STDIN>;
+      chomp;
+      $_ = &do_line($_);
+      $_ =~ s/\s+$//;
+      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
+      print TEMP "<a href=\"$1.html\">$_</a>\n";
+      }
+
+    # A comment that starts "HTML" inserts literal HTML
+
+    elsif (/^\.\\"\s*HTML\s*(.*)/)
+      {
+      print TEMP $1;
+      }
+
+    # A comment that starts < inserts that HTML at the end of the
+    # *next* input line - so as not to get a newline between them.
+
+    elsif (/^\.\\"\s*(<.*>)/)
+      {
+      my($markup) = $1;
+      $_=<STDIN>;
+      chomp;
+      $_ = &do_line($_);
+      $_ =~ s/\s+$//;
+      print TEMP "$_$markup\n";
+      }
+
+    # A comment that starts JOIN joins the next two lines together, with one
+    # space between them. Then that line is processed. This is used in some
+    # displays where two lines are needed for the "man" version. JOINSH works
+    # the same, except that it assumes this is a shell command, so removes
+    # continuation backslashes.
+
+    elsif (/^\.\\"\s*JOIN(SH)?/)
+      {
+      my($one,$two);
+      $one = <STDIN>;
+      $two = <STDIN>;
+      $one =~ s/\s*\\e\s*$// if (defined($1));
+      chomp($one);
+      $two =~ s/^\s+//;
+      $_ = "$one $two";
+      redo;            # Process the joined lines
+      }
+
+    # .EX/.EE are used in the pcredemo page to bracket the entire program,
+    # which is unmodified except for turning backslash into "\e".
+
+    elsif (/^\.EX\s*$/)
+      {
+      print TEMP "<PRE>\n";
+      while (<STDIN>)
+        {
+        last if /^\.EE\s*$/;
+        s/\\e/\\/g;
+        s/&/&amp;/g;
+        s/</&lt;/g;
+        s/>/&gt;/g;
+        print TEMP;
+        }
+      }
+
+    # Ignore anything not recognized
+
+    next;
+    }
+
+  # Line does not begin with a dot. Replace blank lines with new paragraphs
+
+  if (/^\s*$/)
+    {
+    &end_para() if ($wrotetext);
+    next;
+    }
+
+  # Convert fonts changes and output an ordinary line. Ensure that indented
+  # lines are marked as literal.
+
+  $_ = &do_line($_);
+  &new_para() if (!$inpara);
+
+  if (/^\s/)
+    {
+    if (!$inpre)
+      {
+      print TEMP "<pre>\n";
+      $inpre = 1;
+      }
+    }
+  elsif ($inpre)
+    {
+    print TEMP "</pre>\n";
+    $inpre = 0;
+    }
+
+  # Add <br> to the end of a non-literal line if we are within .nf/.fi
+
+  $_ .= "<br>\n" if (!$inpre && $innf);
+
+  print TEMP;
+  $wrotetext = 1;
+  }
+
+# The TOC, if present, will have been written - terminate it
+
+print "</ul>\n" if ($toc);
+
+# Copy the remainder to the standard output
+
+close(TEMP);
+open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
+
+print while (<TEMP>);
+
+print <<End ;
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+End
+
+close(TEMP);
+unlink("/tmp/$$");
+
+# End
--- a/67
+++ b/67
@ -0,0 +1,67 @@
+#! /usr/bin/perl
+
+# A script to scan PCRE2's man pages to check for typos in the control
+# sequences. I use only a small set of the available repertoire, so it is 
+# straightforward to check that nothing else has slipped in by mistake. This
+# script should be called in the doc directory.
+
+$yield = 0;
+
+while (scalar(@ARGV) > 0)
+  {
+  $line = 0; 
+  $file = shift @ARGV;
+    
+  open (IN, $file) || die "Failed to open $file\n";
+  
+  while (<IN>)
+    {  
+    $line++; 
+    if (/^\s*$/)
+      {
+      printf "Empty line $line of $file\n";
+      $yield = 1;  
+      }   
+    elsif (/^\./)
+      {
+      if (!/^\.\s*$|
+            ^\.B\s+\S| 
+            ^\.TH\s\S|
+            ^\.SH\s\S|
+            ^\.SS\s\S|
+            ^\.TP(?:\s?\d+)?\s*$|
+            ^\.SM\s*$|
+            ^\.br\s*$| 
+            ^\.rs\s*$| 
+            ^\.sp\s*$| 
+            ^\.nf\s*$| 
+            ^\.fi\s*$| 
+            ^\.P\s*$| 
+            ^\.PP\s*$| 
+            ^\.\\"(?:\ HREF)?\s*$|
+            ^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
+            ^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
+            ^\.\\"\s<\/a>\s*$|
+            ^\.\\"\sJOINSH\s*$|
+            ^\.\\"\sJOIN\s*$/x  
+         )
+        {
+        printf "Bad control line $line of $file\n";
+        $yield = 1;
+        }
+      }
+    else
+      {
+      if (/\\[^ef]|\\f[^IBP]/)
+        {
+        printf "Bad backslash in line $line of $file\n";  
+        $yield = 1; 
+        } 
+      }   
+    }
+     
+  close(IN);   
+  }
+  
+exit $yield;
+# End  
--- a/113
+++ b/113
@ -0,0 +1,113 @@
+#! /usr/bin/perl -w
+
+# Script to take the output of nroff -man and remove all the backspacing and
+# the page footers and the screen commands etc so that it is more usefully
+# readable online. In fact, in the latest nroff, intermediate footers don't
+# seem to be generated any more.
+
+$blankcount = 0;
+$lastwascut = 0;
+$firstheader = 1;
+
+# Input on STDIN; output to STDOUT.
+
+while (<STDIN>)
+  {
+  s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
+  s/.\x8//g;         # Remove "char, backspace"
+
+  # Handle header lines. Retain only the first one we encounter, but remove
+  # the blank line that follows. Any others (e.g. at end of document) and the
+  # following blank line are dropped.
+
+  if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
+    {
+    if ($firstheader)
+      {
+      $firstheader = 0;
+      print;
+      $lastprinted = $_;
+      $lastwascut = 0;
+      }
+    $_=<STDIN>;       # Remove a blank that follows
+    next;
+    }
+
+  # Count runs of empty lines
+
+  if (/^\s*$/)
+    {
+    $blankcount++;
+    $lastwascut = 0;
+    next;
+    }
+
+  # If a chunk of lines has been cut out (page footer) and the next line
+  # has a different indentation, put back one blank line.
+
+  if ($lastwascut && $blankcount < 1 && defined($lastprinted))
+    {
+    ($a) = $lastprinted =~ /^(\s*)/;
+    ($b) = $_ =~ /^(\s*)/;
+    $blankcount++ if ($a ne $b);
+    }
+
+  # We get here only when we have a non-blank line in hand. If it was preceded
+  # by 3 or more blank lines, read the next 3 lines and see if they are blank.
+  # If so, remove all 7 lines, and remember that we have just done a cut.
+
+  if ($blankcount >= 3)
+    {
+    for ($i = 0; $i < 3; $i++)
+      {
+      $next[$i] = <STDIN>;
+      $next[$i] = "" if !defined $next[$i];
+      $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
+      $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
+      }
+
+    # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
+
+    if ($next[0] =~ /^\s*$/ &&
+        $next[1] =~ /^\s*$/ &&
+        $next[2] =~ /^\s*$/)
+      {
+      $blankcount -= 3;
+      $lastwascut = 1;
+      }
+
+    # Otherwise output the saved blanks, the current, and the next three
+    # lines. Remember the last printed line.
+
+    else
+      {
+      for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
+      print;
+      for ($i = 0; $i < 3; $i++)
+        {
+        $next[$i] =~ s/.\x8//g;
+        print $next[$i];
+        $lastprinted = $_;
+        }
+      $lastwascut = 0;
+      $blankcount = 0;
+      }
+    }
+
+  # This non-blank line is not preceded by 3 or more blank lines. Output
+  # any blanks there are, and the line. Remember it. Force two blank lines
+  # before headings.
+
+  else
+    {
+    $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
+      defined($lastprinted);
+    for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
+    print;
+    $lastprinted = $_;
+    $lastwascut = 0;
+    $blankcount = 0;
+    }
+  }
+
+# End
--- a/35
+++ b/35
@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+# This is a script for removing trailing whitespace from lines in files that
+# are listed on the command line.
+
+# This subroutine does the work for one file.
+
+sub detrail {
+my($file) = $_[0];
+my($changed) = 0;
+open(IN, "$file") || die "Can't open $file for input";
+@lines = <IN>;
+close(IN);
+foreach (@lines)
+  {
+  if (/\s+\n$/)
+    {
+    s/\s+\n$/\n/;
+    $changed = 1;
+    }
+  }
+if ($changed)
+  {
+  open(OUT, ">$file") || die "Can't open $file for output";
+  print OUT @lines;
+  close(OUT);
+  }
+}
+
+# This is the main program
+
+$, = "";   # Output field separator
+for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
+
+# End
--- a/265
+++ b/265
@ -0,0 +1,265 @@
+#/bin/sh
+
+# Script to prepare the files for building a PCRE2 release. It does some
+# processing of the documentation, detrails files, and creates pcre2.h.generic
+# and config.h.generic (for use by builders who can't run ./configure).
+
+# You must run this script before runnning "make dist". If its first argument
+# is "doc", it stops after preparing the documentation. There are no other
+# arguments. The script makes use of the following files:
+
+# 132html     A Perl script that converts a .1 or .3 man page into HTML. It
+#             "knows" the relevant troff constructs that are used in the PCRE2
+#             man pages.
+
+# CheckMan    A Perl script that checks man pages for typos in the mark up.
+
+# CleanTxt    A Perl script that cleans up the output of "nroff -man" by
+#             removing backspaces and other redundant text so as to produce
+#             a readable .txt file.
+
+# Detrail     A Perl script that removes trailing spaces from files.
+
+# doc/index.html.src
+#             A file that is copied as index.html into the doc/html directory
+#             when the HTML documentation is built. It works like this so that
+#             doc/html can be deleted and re-created from scratch.
+
+# README & NON-AUTOTOOLS-BUILD
+#             These files are copied into the doc/html directory, with .txt
+#             extensions so that they can by hyperlinked from the HTML 
+#             documentation, because some people just go to the HTML without
+#             looking for text files.
+
+
+# First, sort out the documentation. Remove pcre2demo.3 first because it won't
+# pass the markup check (it is created below, using markup that none of the
+# other pages use).
+
+cd doc
+echo Processing documentation
+
+/bin/rm -f pcre2demo.3
+
+# Check the remaining man pages
+
+perl ../CheckMan *.1 *.3
+if [ $? != 0 ] ; then exit 1; fi
+
+# Make Text form of the documentation. It needs some mangling to make it
+# tidy for online reading. Concatenate all the .3 stuff, but omit the
+# individual function pages.
+
+cat <<End >pcre2.txt
+-----------------------------------------------------------------------------
+This file contains a concatenation of the PCRE2 man pages, converted to plain
+text format for ease of searching with a text editor, or for use on systems
+that do not have a man page processor. The small individual files that give
+synopses of each function in the library have not been included. Neither has
+the pcre2demo program. There are separate text files for the pcre2grep and
+pcre2test commands.
+-----------------------------------------------------------------------------
+
+
+End
+
+echo "Making pcre2.txt"
+for file in pcre2api pcre2callout pcre2unicode ; do
+
+#for file in pcre pcre16 pcre32 pcrebuild pcrematching \
+#            pcrecompat pcrepattern pcresyntax pcrejit pcrepartial \
+#            pcreprecompile pcreperform pcreposix pcrecpp pcresample \
+#            pcrelimits pcrestack ; do
+ 
+  echo "  Processing $file.3"
+  nroff -c -man $file.3 >$file.rawtxt
+  perl ../CleanTxt <$file.rawtxt >>pcre2.txt
+  /bin/rm $file.rawtxt
+  echo "------------------------------------------------------------------------------" >>pcre2.txt
+  if [ "$file" != "pcre2sample" ] ; then
+    echo " " >>pcre2.txt
+    echo " " >>pcre2.txt
+  fi
+done
+
+# The three commands
+for file in pcre2test ; do
+# for file in pcre2test pcre2grep pcre-config ; do
+  echo Making $file.txt
+  nroff -c -man $file.1 >$file.rawtxt
+  perl ../CleanTxt <$file.rawtxt >$file.txt
+  /bin/rm $file.rawtxt
+done
+
+
+# Make pcre2demo.3 from the pcre2demo.c source file
+
+echo "Making pcre2demo.3"
+perl <<"END" >pcre2demo.3
+  open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
+  open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
+  print OUT ".\\\" Start example.\n" .
+            ".de EX\n" .
+            ".  nr mE \\\\n(.f\n" .
+            ".  nf\n" .
+            ".  nh\n" .
+            ".  ft CW\n" .
+            "..\n" .
+            ".\n" .
+            ".\n" .
+            ".\\\" End example.\n" .
+            ".de EE\n" .
+            ".  ft \\\\n(mE\n" .
+            ".  fi\n" .
+            ".  hy \\\\n(HY\n" .
+            "..\n" .
+            ".\n" .
+            ".EX\n" ;
+  while (<IN>)
+    {
+    s/\\/\\e/g;
+    print OUT;
+    }
+  print OUT ".EE\n";
+  close(IN);
+  close(OUT);
+END
+if [ $? != 0 ] ; then exit 1; fi
+
+
+# Make HTML form of the documentation.
+
+echo "Making HTML documentation"
+/bin/rm html/*
+cp index.html.src html/index.html
+cp ../README html/README.txt
+# cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
+
+for file in *.1 ; do
+  base=`basename $file .1`
+  echo "  Making $base.html"
+  perl ../132html -toc $base <$file >html/$base.html
+done
+
+# Exclude table of contents for function summaries. It seems that expr
+# forces an anchored regex. Also exclude them for small pages that have
+# only one section.
+
+for file in *.3 ; do
+  base=`basename $file .3`
+  toc=-toc
+  if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
+  if [ "$base" = "pcre2sample" ]  || \
+     [ "$base" = "pcre2stack" ]   || \
+     [ "$base" = "pcre2compat" ]  || \
+     [ "$base" = "pcre2limits" ]  || \
+     [ "$base" = "pcre2perform" ] || \
+     [ "$base" = "pcre2unicode" ] ; then
+    toc=""
+  fi
+  echo "  Making $base.html"
+  perl ../132html $toc $base <$file >html/$base.html
+  if [ $? != 0 ] ; then exit 1; fi
+done
+
+# End of documentation processing; stop if only documentation required.
+
+cd ..
+echo Documentation done
+if [ "$1" = "doc" ] ; then exit; fi
+
+# FIXME pro tem only do docs
+exit
+
+# These files are detrailed; do not detrail the test data because there may be
+# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
+# line endings and the detrail script removes all trailing white space. The
+# configure files are also omitted from the detrailing. We don't bother with
+# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
+# common file, because they aren't going to change.
+
+files="\
+  Makefile.am \
+  Makefile.in \
+  configure.ac \
+  README \
+  LICENCE \
+  COPYING \
+  AUTHORS \
+  NEWS \
+  NON-UNIX-USE \
+  NON-AUTOTOOLS-BUILD \
+  INSTALL \
+  132html \
+  CleanTxt \
+  Detrail \
+  ChangeLog \
+  CMakeLists.txt \
+  RunGrepTest \
+  RunTest \
+  pcre-config.in \
+  libpcre.pc.in \
+  libpcre16.pc.in \
+  libpcre32.pc.in \
+  libpcreposix.pc.in \
+  libpcrecpp.pc.in \
+  config.h.in \
+  pcre_chartables.c.dist \
+  pcredemo.c \
+  pcregrep.c \
+  pcretest.c \
+  dftables.c \
+  pcreposix.c \
+  pcreposix.h \
+  pcre.h.in \
+  pcre_internal.h \
+  pcre_byte_order.c \
+  pcre_compile.c \
+  pcre_config.c \
+  pcre_dfa_exec.c \
+  pcre_exec.c \
+  pcre_fullinfo.c \
+  pcre_get.c \
+  pcre_globals.c \
+  pcre_jit_compile.c \
+  pcre_jit_test.c \
+  pcre_maketables.c \
+  pcre_newline.c \
+  pcre_ord2utf8.c \
+  pcre16_ord2utf16.c \
+  pcre32_ord2utf32.c \
+  pcre_printint.c \
+  pcre_refcount.c \
+  pcre_string_utils.c \
+  pcre_study.c \
+  pcre_tables.c \
+  pcre_valid_utf8.c \
+  pcre_version.c \
+  pcre_xclass.c \
+  pcre16_utf16_utils.c \
+  pcre32_utf32_utils.c \
+  pcre16_valid_utf16.c \
+  pcre32_valid_utf32.c \
+  pcre_scanner.cc \
+  pcre_scanner.h \
+  pcre_scanner_unittest.cc \
+  pcrecpp.cc \
+  pcrecpp.h \
+  pcrecpparg.h.in \
+  pcrecpp_unittest.cc \
+  pcre_stringpiece.cc \
+  pcre_stringpiece.h.in \
+  pcre_stringpiece_unittest.cc \
+  perltest.pl \
+  ucp.h \
+  makevp.bat \
+  pcre.def \
+  libpcre.def \
+  libpcreposix.def"
+
+echo Detrailing
+perl ./Detrail $files doc/p* doc/html/*
+
+echo Done
+
+#End
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -0,0 +1 @@
+This is a placeholder README file for a work in progress.
--- a/doc/html/index.html
+++ b/doc/html/index.html
@ -0,0 +1,177 @@
+<html>
+<!-- This is a manually maintained file that is the root of the HTML version of 
+     the PCRE2 documentation. When the HTML documents are built from the man 
+     page versions, the entire doc/html directory is emptied, this file is then 
+     copied into doc/html/index.html, and the remaining files therein are 
+     created by the 132html script.
+-->      
+<head>
+<title>PCRE2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
+<p>
+The HTML documentation for PCRE2 consists of a number of pages that are listed
+below in alphabetical order. If you are new to PCRE2, please read the first one
+first.
+</p>
+
+<table>
+<tr><td><a href="pcre2.html">pcre</a></td>
+    <td>&nbsp;&nbsp;Introductory page</td></tr>
+
+<tr><td><a href="pcre2-config.html">pcre-config</a></td>
+    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
+
+<tr><td><a href="pcre2api.html">pcreapi</a></td>
+    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
+
+<tr><td><a href="pcre2build.html">pcrebuild</a></td>
+    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
+
+<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
+    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
+
+<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
+    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
+
+<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
+    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
+
+<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
+
+<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
+<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
+    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
+
+<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
+    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
+
+<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
+    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
+
+<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
+    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
+
+<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
+    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
+
+<tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
+    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
+
+<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
+    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
+
+<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
+    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
+
+<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
+    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
+
+<tr><td><a href="pcre2test.html">pcre2test</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
+
+<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
+</table>
+
+<p>
+There are also individual pages that summarize the interface for each function
+in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
+functions.
+</p>
+
+<table>    
+
+<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
+    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
+
+<tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
+
+<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
+    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
+
+<tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
+    <td>&nbsp;&nbsp;Free study data</td></tr>
+
+<tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
+    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
+
+<tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
+    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
+
+<tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
+    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
+
+<tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
+    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
+
+<tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
+    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
+
+<tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
+    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
+
+<tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
+    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
+    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
+    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
+
+<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
+    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
+    
+<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
+    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_study.html">pcre2_study</a></td>
+    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_version.html">pcre2_version</a></td>
+    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
+</table>
+
+</html>
+
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
--- a/doc/html/pcre2callout.html
+++ b/doc/html/pcre2callout.html
@ -0,0 +1,270 @@
+<html>
+<head>
+<title>pcre2callout specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2callout man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
+<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
+<li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
+<li><a name="TOC6" href="#SEC6">AUTHOR</a>
+<li><a name="TOC7" href="#SEC7">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>int (*pcre2_callout)(pcre2_callout_block *);</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+PCRE2 provides a feature called "callout", which is a means of temporarily
+passing control to the caller of PCRE2 in the middle of pattern matching. The
+caller of PCRE2 provides an external function by putting its entry point in
+a match context (see <b>pcre2_set_callout()</b>) in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation).
+</P>
+<P>
+Within a regular expression, (?C) indicates the points at which the external
+function is to be called. Different callout points can be identified by putting
+a number less than 256 after the letter C. The default value is zero.
+For example, this pattern has two callout points:
+<pre>
+  (?C1)abc(?C2)def
+</pre>
+If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
+automatically inserts callouts, all with number 255, before each item in the
+pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
+<pre>
+  A(\d{2}|--)
+</pre>
+it is processed as if it were
+<br>
+<br>
+(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+<br>
+<br>
+Notice that there is a callout before and after each parenthesis and
+alternation bar. If the pattern contains a conditional group whose condition is
+an assertion, an automatic callout is inserted immediately before the
+condition. Such a callout may also be inserted explicitly, for example:
+<pre>
+  (?(?C9)(?=a)ab|de)
+</pre>
+This applies only to assertion conditions (because they are themselves
+independent groups).
+</P>
+<P>
+Automatic callouts can be used for tracking the progress of pattern matching.
+The
+<a href="pcre2test.html"><b>pcre2test</b></a>
+program has a pattern qualifier (/auto_callout) that sets automatic callouts;
+when it is used, the output indicates how the pattern is being matched. This is
+useful information when you are trying to optimize the performance of a
+particular pattern.
+</P>
+<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
+<P>
+You should be aware that, because of optimizations in the way PCRE2 compiles
+and matches patterns, callouts sometimes do not happen exactly as you might
+expect.
+</P>
+<P>
+At compile time, PCRE2 "auto-possessifies" repeated items when it knows that
+what follows cannot be part of the repeat. For example, a+[bc] is compiled as
+if it were a++[bc]. The <b>pcre2test</b> output when this pattern is anchored
+and then applied with automatic callouts to the string "aaaa" is:
+<pre>
+  ---&#62;aaaa
+   +0 ^        ^
+   +1 ^        a+
+   +3 ^   ^    [bc]
+  No match
+</pre>
+This indicates that when matching [bc] fails, there is no backtracking into a+
+and therefore the callouts that would be taken for the backtracks do not occur.
+You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS
+to <b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
+this is done in <b>pcre2test</b> (using the /no_auto_possess qualifier), the
+output changes to this:
+<pre>
+  ---&#62;aaaa
+   +0 ^        ^
+   +1 ^        a+
+   +3 ^   ^    [bc]
+   +3 ^  ^     [bc]
+   +3 ^ ^      [bc]
+   +3 ^^       [bc]
+  No match
+</pre>
+This time, when matching [bc] fails, the matcher backtracks into a+ and tries
+again, repeatedly, until a+ itself fails.
+</P>
+<P>
+Other optimizations that provide fast "no match" results also affect callouts.
+For example, if the pattern is
+<pre>
+  ab(?C4)cd
+</pre>
+PCRE2 knows that any matching string must contain the letter "d". If the
+subject string is "abyz", the lack of "d" means that matching doesn't ever
+start, and the callout is never reached. However, with "abyd", though the
+result is still no match, the callout is obeyed.
+</P>
+<P>
+PCRE2 also knows the minimum length of a matching string, and will immediately
+give a "no match" return without actually running a match if the subject is not
+long enough, or, for unanchored patterns, if it has been scanned far enough.
+</P>
+<P>
+You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
+option to the matching function, or by starting the pattern with
+(*NO_START_OPT). This slows down the matching process, but does ensure that
+callouts such as the example above are obeyed.
+</P>
+<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
+<P>
+During matching, when PCRE2 reaches a callout point, the external function that
+is set in the match context is called (if it is set). This applies to both
+normal and DFA matching. The only argument to the callout function is a pointer
+to a <b>pcre2_callout</b> block. This structure contains the following fields:
+<pre>
+  uint32_t      <i>version</i>;
+  uint32_t      <i>callout_number</i>;
+  uint32_t      <i>capture_top</i>;
+  uint32_t      <i>capture_last</i>;
+  void         *<i>callout_data</i>;
+  PCRE2_SIZE   *<i>offset_vector</i>;
+  PCRE2_SPTR    <i>mark</i>;
+  PCRE2_SPTR    <i>subject</i>;
+  PCRE2_SIZE    <i>subject_length</i>;
+  PCRE2_SIZE    <i>start_match</i>;
+  PCRE2_SIZE    <i>current_position</i>;
+  PCRE2_SIZE    <i>pattern_position</i>;
+  PCRE2_SIZE    <i>next_item_length</i>;
+</pre>
+The <i>version</i> field contains the version number of the block format. The
+current version is 0. The version number will change in future if additional
+fields are added, but the intention is never to remove any of the existing
+fields.
+</P>
+<P>
+The <i>callout_number</i> field contains the number of the callout, as compiled
+into the pattern (that is, the number after ?C for manual callouts, and 255 for
+automatically generated callouts).
+</P>
+<P>
+The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
+(the "ovector") that was passed to the matching function in the match data
+block. When <b>pcre2_match()</b> is used, the contents can be inspected, in
+order to extract substrings that have been matched so far, in the same way as
+for extracting substrings after a match has completed. For the DFA matching
+function, this field is not useful.
+</P>
+<P>
+The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
+that were passed to the matching function.
+</P>
+<P>
+The <i>start_match</i> field normally contains the offset within the subject at
+which the current match attempt started. However, if the escape sequence \K
+has been encountered, this value is changed to reflect the modified starting
+point. If the pattern is not anchored, the callout function may be called
+several times from the same point in the pattern for different starting points
+in the subject.
+</P>
+<P>
+The <i>current_position</i> field contains the offset within the subject of the
+current match pointer.
+</P>
+<P>
+When the <b>pcre2_match()</b> is used, the <i>capture_top</i> field contains one
+more than the number of the highest numbered captured substring so far. If no
+substrings have been captured, the value of <i>capture_top</i> is one. This is
+always the case when the DFA functions are used, because they do not support
+captured substrings.
+</P>
+<P>
+The <i>capture_last</i> field contains the number of the most recently captured
+substring. However, when a recursion exits, the value reverts to what it was
+outside the recursion, as do the values of all captured substrings. If no
+substrings have been captured, the value of <i>capture_last</i> is 0. This is
+always the case for the DFA matching functions.
+</P>
+<P>
+The <i>callout_data</i> field contains a value that is passed to a matching
+function specifically so that it can be passed back in callouts. It is set in
+the match context when the callout is set up by calling
+<b>pcre2_set_callout()</b> (see the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation).
+</P>
+<P>
+The <i>pattern_position</i> field contains the offset to the next item to be
+matched in the pattern string.
+</P>
+<P>
+The <i>next_item_length</i> field contains the length of the next item to be
+matched in the pattern string. When the callout immediately precedes an
+alternation bar, a closing parenthesis, or the end of the pattern, the length
+is zero. When the callout precedes an opening parenthesis, the length is that
+of the entire subpattern.
+</P>
+<P>
+The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
+help in distinguishing between different automatic callouts, which all have the
+same callout number. However, they are set for all callouts.
+</P>
+<P>
+In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
+the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
+(*THEN) item in the match, or NULL if no such items have been passed. Instances
+of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
+callouts from the DFA matching function this field always contains NULL.
+</P>
+<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
+<P>
+The external callout function returns an integer to PCRE2. If the value is
+zero, matching proceeds as normal. If the value is greater than zero, matching
+fails at the current point, but the testing of other matching possibilities
+goes ahead, just as if a lookahead assertion had failed. If the value is less
+than zero, the match is abandoned, and the matching function returns the
+negative value.
+</P>
+<P>
+Negative values should normally be chosen from the set of PCRE2_ERROR_xxx
+values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
+failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
+functions; it will never be used by PCRE2 itself.
+</P>
+<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC7" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 19 October 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2demo.html
+++ b/doc/html/pcre2demo.html
@ -0,0 +1,443 @@
+<html>
+<head>
+<title>pcre2demo specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2demo man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+</ul>
+<PRE>
+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+calling the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit 
+width. This demonstration program uses the 8-bit library.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* This macro must be defined before including pcre2.h. For a program that uses 
+only one code unit width, it makes it possible to use generic function names 
+such as pcre2_compile(). */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include &lt;stdio.h&gt;
+#include &lt;string.h&gt;
+#include &lt;pcre2.h&gt;
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int namecount;
+int name_entry_size;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+
+size_t subject_length;
+pcre2_match_data *match_data;
+
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present. Apart from that, there must be exactly two *
+* arguments.                                                              *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i &lt; argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+    else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Two arguments required: a regex and a subject string\n");
+  return 1;
+  }
+
+/* As pattern and subject are char arguments, they can be straightforwardly
+cast to PCRE2_SPTR as we are working in 8-bit code units. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,              /* the pattern */
+  -1,                   /* indicates pattern is zero-terminated */ 
+  0,                    /* default options */
+  &amp;errornumber,         /* for error number */
+  &amp;erroroffset,         /* for error offset */
+  NULL);                /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256]; 
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, 
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE again, in order to do a     *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result.        *
+*************************************************************************/
+
+/* Using this function ensures that the block is exactly the right size for
+the number of capturing parentheses in the pattern. */
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc &lt; 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\n", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /* data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeded. Get a pointer to the output vector, where string offsets are 
+stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used 
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\n");
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i &lt; rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  size_t substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &amp;namecount);          /* where to put the answer */
+
+if (namecount &lt;= 0) printf("No named substrings\n"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\n");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &amp;name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &amp;name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two 
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i &lt; namecount; i++)
+    {
+    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &amp;option_bits);
+utf8 = (option_bits &amp; PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF; 
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                    /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
+        start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
+        subject[start_offset] == '\r' &amp;&amp;
+        subject[start_offset + 1] == '\n')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] &lt; subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc &lt; 0)
+    {
+    printf("Matching error %d\n", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeded */
+
+  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\n");
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i &lt; rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount &lt;= 0) printf("No named substrings\n"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\n");
+    for (i = 0; i &lt; namecount; i++)
+      {
+      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\n");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -0,0 +1,270 @@
+<html>
+<head>
+<title>pcre2unicode specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2unicode man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+UNICODE AND UTF SUPPORT
+</b><br>
+<P>
+When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
+character properties and can process text strings in UTF-8, UTF-16, or UTF-32
+format (depending on the code unit width). By default, PCRE2 assumes that one
+code unit is one character. To process a pattern as a UTF string, where a
+character may require more than one code unit, you must call
+<a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
+with the PCRE2_UTF option flag, or the pattern must start with the sequence
+(*UTF). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF strings instead of
+strings of individual one-code-unit characters.
+</P>
+<P>
+If you build PCRE2 with Unicode support, the library will be bigger, but the
+additional run time overhead is limited to testing the PCRE2_UTF flag
+occasionally, so should not be very much.
+</P>
+<br><b>
+UNICODE PROPERTY SUPPORT
+</b><br>
+<P>
+When PCRE2 is built with Unicode support, the escape sequences \p{..},
+\P{..}, and \X can be used. The Unicode properties that can be tested are
+limited to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
+the derived properties Any and L&. Full lists are given in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+and
+<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
+documentation. Only the short names for properties are supported. For example,
+\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
+Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
+compatibility with Perl 5.6. PCRE does not support this.
+</P>
+<br><b>
+WIDE CHARACTERS AND UTF MODES
+</b><br>
+<P>
+Codepoints less than 256 can be specified in patterns by either braced or
+unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
+values have to use braced sequences. Unbraced octal code points up to \777 are
+also recognized; larger ones can be coded using \o{...}.
+</P>
+<P>
+In UTF modes, repeat quantifiers apply to complete UTF characters, not to
+individual code units.
+</P>
+<P>
+In UTF modes, the dot metacharacter matches one UTF character instead of a
+single code unit.
+</P>
+<P>
+The escape sequence \C can be used to match a single code unit, in a UTF mode, 
+but its use can lead to some strange effects because it breaks up multi-unit
+characters (see the description of \C in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation). The use of \C is not supported in the alternative matching
+function <b>pcre2_dfa_exec()</b>, nor is it supported in UTF mode by the JIT
+optimization. If JIT optimization is requested for a UTF pattern that contains
+\C, it will not succeed, and so the matching will be carried out by the normal
+interpretive function.
+</P>
+<P>
+The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
+characters of any code value, but, by default, the characters that PCRE2
+recognizes as digits, spaces, or word characters remain the same set as in
+non-UTF mode, all with code points less than 256. This remains true even when
+PCRE2 is built to include Unicode support, because to do otherwise would slow
+down matching in many common cases. Note that this also applies to \b
+and \B, because they are defined in terms of \w and \W. If you want
+to test for a wider sense of, say, "digit", you can use explicit Unicode
+property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option,
+the way that the character escapes work is changed so that Unicode properties
+are used to determine which characters match. There are more details in the
+section on
+<a href="pcre2pattern.html#genericchartypes">generic character types</a>
+in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation.
+</P>
+<P>
+Similarly, characters that match the POSIX named character classes are all
+low-valued characters, unless the PCRE2_UCP option is set.
+</P>
+<P>
+However, the special horizontal and vertical white space matching escapes (\h,
+\H, \v, and \V) do match all the appropriate Unicode characters, whether or
+not PCRE2_UCP is set.
+</P>
+<P>
+Case-insensitive matching in UTF mode makes use of Unicode properties. A few
+Unicode characters such as Greek sigma have more than two codepoints that are
+case-equivalent, and these are treated as such.
+</P>
+<br><b>
+VALIDITY OF UTF STRINGS
+</b><br>
+<P>
+When the PCRE2_UTF option is set, the strings passed as patterns and subjects
+are (by default) checked for validity on entry to the relevant functions. 
+If an invalid UTF string is passed, an error return is given. 
+</P>
+<P>
+UTF-16 and UTF-32 strings can indicate their endianness by special code knows
+as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
+strings to be in host byte order.
+</P>
+<P>
+The entire string is checked before any other processing takes place. In
+addition to checking the format of the string, there is a check to ensure that
+all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
+The so-called "non-character" code points are not excluded because Unicode
+corrigendum #9 makes it clear that they should not be.
+</P>
+<P>
+Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
+where they are used in pairs to encode code points with values greater than
+0xFFFF. The code points that are encoded by UTF-16 pairs are available
+independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
+surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
+UTF-32.)
+</P>
+<P>
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
+assumes that the pattern or subject it is given (respectively) contains only
+valid UTF code unit sequences.
+</P>
+<P>
+Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
+the pattern; it does not also apply to subject strings. If you want to disable
+the check for a subject string you must pass this option to <b>pcre2_exec()</b>
+or <b>pcre2_dfa_exec()</b>.
+</P>
+<P>
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely.
+<a name="utf8strings"></a></P>
+<br><b>
+Errors in UTF-8 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-8 strings:
+<pre>
+  PCRE2_ERROR_UTF8_ERR1
+  PCRE2_ERROR_UTF8_ERR2
+  PCRE2_ERROR_UTF8_ERR3
+  PCRE2_ERROR_UTF8_ERR4
+  PCRE2_ERROR_UTF8_ERR5
+</pre>
+The string ends with a truncated UTF-8 character; the code specifies how many
+bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
+no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
+allows for up to 6 bytes, and this is checked first; hence the possibility of
+4 or 5 missing bytes.
+<pre>
+  PCRE2_ERROR_UTF8_ERR6
+  PCRE2_ERROR_UTF8_ERR7
+  PCRE2_ERROR_UTF8_ERR8
+  PCRE2_ERROR_UTF8_ERR9
+  PCRE2_ERROR_UTF8_ERR10
+</pre>
+The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
+character do not have the binary value 0b10 (that is, either the most
+significant bit is 0, or the next bit is 1).
+<pre>
+  PCRE2_ERROR_UTF8_ERR11
+  PCRE2_ERROR_UTF8_ERR12
+</pre>
+A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
+these code points are excluded by RFC 3629.
+<pre>
+  PCRE2_ERROR_UTF8_ERR13
+</pre>
+A 4-byte character has a value greater than 0x10fff; these code points are
+excluded by RFC 3629.
+<pre>
+  PCRE2_ERROR_UTF8_ERR14
+</pre>
+A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
+code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
+from UTF-8.
+<pre>
+  PCRE2_ERROR_UTF8_ERR15
+  PCRE2_ERROR_UTF8_ERR16
+  PCRE2_ERROR_UTF8_ERR17
+  PCRE2_ERROR_UTF8_ERR18
+  PCRE2_ERROR_UTF8_ERR19
+</pre>
+A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
+value that can be represented by fewer bytes, which is invalid. For example,
+the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
+one byte.
+<pre>
+  PCRE2_ERROR_UTF8_ERR20
+</pre>
+The two most significant bits of the first byte of a character have the binary
+value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
+byte can only validly occur as the second or subsequent byte of a multi-byte
+character.
+<pre>
+  PCRE2_ERROR_UTF8_ERR21
+</pre>
+The first byte of a character has the value 0xfe or 0xff. These values can
+never occur in a valid UTF-8 string.
+<a name="utf16strings"></a></P>
+<br><b>
+Errors in UTF-16 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-16 strings:
+<pre>
+  PCRE_UTF16_ERR1  Missing low surrogate at end of string
+  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
+  PCRE_UTF16_ERR3  Isolated low surrogate
+
+<a name="utf32strings"></a></PRE>
+</P>
+<br><b>
+Errors in UTF-32 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-32 strings:
+<pre>
+  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
+  PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
+
+</PRE>
+</P>
+<br><b>
+AUTHOR
+</b><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><b>
+REVISION
+</b><br>
+<P>
+Last updated: 16 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/index.html.src
+++ b/doc/index.html.src
@ -0,0 +1,177 @@
+<html>
+<!-- This is a manually maintained file that is the root of the HTML version of 
+     the PCRE2 documentation. When the HTML documents are built from the man 
+     page versions, the entire doc/html directory is emptied, this file is then 
+     copied into doc/html/index.html, and the remaining files therein are 
+     created by the 132html script.
+-->      
+<head>
+<title>PCRE2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
+<p>
+The HTML documentation for PCRE2 consists of a number of pages that are listed
+below in alphabetical order. If you are new to PCRE2, please read the first one
+first.
+</p>
+
+<table>
+<tr><td><a href="pcre2.html">pcre</a></td>
+    <td>&nbsp;&nbsp;Introductory page</td></tr>
+
+<tr><td><a href="pcre2-config.html">pcre-config</a></td>
+    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
+
+<tr><td><a href="pcre2api.html">pcreapi</a></td>
+    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
+
+<tr><td><a href="pcre2build.html">pcrebuild</a></td>
+    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
+
+<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
+    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
+
+<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
+    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
+
+<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
+    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
+
+<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
+
+<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
+<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
+    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
+
+<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
+    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
+
+<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
+    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
+
+<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
+    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
+
+<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
+    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
+
+<tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
+    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
+
+<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
+    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
+
+<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
+    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
+
+<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
+    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
+
+<tr><td><a href="pcre2test.html">pcre2test</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
+
+<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
+</table>
+
+<p>
+There are also individual pages that summarize the interface for each function
+in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
+functions.
+</p>
+
+<table>    
+
+<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
+    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
+
+<tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
+
+<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
+    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
+
+<tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
+    <td>&nbsp;&nbsp;Free study data</td></tr>
+
+<tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
+    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
+
+<tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
+    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
+
+<tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
+    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
+
+<tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
+    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
+
+<tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
+    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
+
+<tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
+    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
+
+<tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
+    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
+    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
+    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
+
+<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
+    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
+    
+<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
+    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_study.html">pcre2_study</a></td>
+    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_version.html">pcre2_version</a></td>
+    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
+</table>
+
+</html>
+
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -214,7 +214,7 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
 .sp
 .B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP, PCRE2_SIZE \fIlength\fP);
-.sp
+.fi
 .
 .
 .SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
--- a/doc/pcre2demo.3
+++ b/doc/pcre2demo.3
@ -0,0 +1,441 @@
+.\" Start example.
+.de EX
+.  nr mE \\n(.f
+.  nf
+.  nh
+.  ft CW
+..
+.
+.
+.\" End example.
+.de EE
+.  ft \\n(mE
+.  fi
+.  hy \\n(HY
+..
+.
+.EX
+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+calling the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit 
+width. This demonstration program uses the 8-bit library.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* This macro must be defined before including pcre2.h. For a program that uses 
+only one code unit width, it makes it possible to use generic function names 
+such as pcre2_compile(). */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <stdio.h>
+#include <string.h>
+#include <pcre2.h>
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int namecount;
+int name_entry_size;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+
+size_t subject_length;
+pcre2_match_data *match_data;
+
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present. Apart from that, there must be exactly two *
+* arguments.                                                              *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i < argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+    else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Two arguments required: a regex and a subject string\en");
+  return 1;
+  }
+
+/* As pattern and subject are char arguments, they can be straightforwardly
+cast to PCRE2_SPTR as we are working in 8-bit code units. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,              /* the pattern */
+  -1,                   /* indicates pattern is zero-terminated */ 
+  0,                    /* default options */
+  &errornumber,         /* for error number */
+  &erroroffset,         /* for error offset */
+  NULL);                /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256]; 
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, 
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE again, in order to do a     *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result.        *
+*************************************************************************/
+
+/* Using this function ensures that the block is exactly the right size for
+the number of capturing parentheses in the pattern. */
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc < 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\en"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\en", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /* data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeded. Get a pointer to the output vector, where string offsets are 
+stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used 
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\en");
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i < rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  size_t substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &namecount);          /* where to put the answer */
+
+if (namecount <= 0) printf("No named substrings\en"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\en");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two 
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i < namecount; i++)
+    {
+    int n = (tabptr[0] << 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
+utf8 = (option_bits & PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF; 
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                    /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &&                      /* If CRLF is newline & */
+        start_offset < subject_length - 1 &&    /* we are at CRLF, */
+        subject[start_offset] == '\er' &&
+        subject[start_offset + 1] == '\en')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] < subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc < 0)
+    {
+    printf("Matching error %d\en", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeded */
+
+  printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\en");
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i < rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount <= 0) printf("No named substrings\en"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\en");
+    for (i = 0; i < namecount; i++)
+      {
+      int n = (tabptr[0] << 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\en");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+.EE
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -154,7 +154,7 @@ Do not output the version number of \fBpcre2test\fP at the start of execution.
 \fB-S\fP \fIsize\fP
 On Unix-like systems, set the size of the run-time stack to \fIsize\fP
 megabytes.
-.TP10
+.TP 10
 \fB-subject\fP \fImodifier-list\fP
 Behave as if each subject line contains the given modifiers.
 .TP 10
@ -366,7 +366,7 @@ include a closing square bracket in the characters, code it as \ex5D.
 A backslash followed by an equals sign marke the end of the subject string and
 the start of a modifier list. For example:
 .sp
-  abc\=notbol,notempty
+  abc\e=notbol,notempty
 .sp
 A backslash followed by any other non-alphanumeric character just escapes that
 character. A backslash followed by anything else causes an error. However, if
@ -746,7 +746,7 @@ the actual match are indicated in the output by '<' or '>' characters
 underneath them. Here is an example:
 .sp
  /(?<=pqr)abc(?=xyz)/
-      123pqrabcxyz456\=allusedtext
+      123pqrabcxyz456\e=allusedtext
   0: pqrabcxyz
      <<<   >>>
 .sp
@ -789,7 +789,7 @@ The \fBcopy\fP and \fBget\fP modifiers can be used to test the
 They can be given more than once, and each can specify a group name or number,
 for example:
 .sp
-   abcd\=copy=1,copy=3,get=G1
+   abcd\e=copy=1,copy=3,get=G1
 .sp
 If the \fB#subject\fP command is used to set default copy and get lists, these
 can be unset by specifying a negative number for numbered groups and an empty
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
--- a/src/pcre2demo.c
+++ b/src/pcre2demo.c
@ -420,4 +420,4 @@ pcre2_code_free(re);
 return 0;
 }

-/* End of pcredemo.c */
+/* End of pcre2demo.c */
				`@ -0,0 +1 @@`
				`This is a placeholder README file for a work in progress.`