From e15b64ef0365862f6ab98ad1643d58cd3a7e2c35 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel" <Philip.Hazel@gmail.com>
Date: Sun, 28 Sep 2014 17:39:28 +0000
Subject: [PATCH] More draft documentation.

---
 Makefile.am                      |  46 +-
 PrepareRelease                   |  16 +-
 doc/html/NON-AUTOTOOLS-BUILD.txt | 402 +++++++++++++++
 doc/html/README.txt              | 833 ++++++++++++++++++++++++++++++-
 doc/html/pcre2-config.html       | 102 ++++
 doc/html/pcre2.html              | 182 +++++++
 doc/html/pcre2grep.html          | 759 ++++++++++++++++++++++++++++
 doc/pcre2-config.1               |  86 ++++
 doc/pcre2-config.txt             |  81 +++
 doc/pcre2.3                      | 180 +++++++
 doc/pcre2build.3                 | 490 ++++++++++++++++++
 doc/pcre2compat.3                | 190 +++++++
 doc/pcre2grep.1                  | 683 +++++++++++++++++++++++++
 doc/pcre2grep.txt                | 741 +++++++++++++++++++++++++++
 src/pcre2grep.c                  |   2 +-
 src/pcre2test.c                  |   4 +-
 16 files changed, 4754 insertions(+), 43 deletions(-)
 create mode 100644 doc/html/NON-AUTOTOOLS-BUILD.txt
 create mode 100644 doc/html/pcre2-config.html
 create mode 100644 doc/html/pcre2.html
 create mode 100644 doc/html/pcre2grep.html
 create mode 100644 doc/pcre2-config.1
 create mode 100644 doc/pcre2-config.txt
 create mode 100644 doc/pcre2.3
 create mode 100644 doc/pcre2build.3
 create mode 100644 doc/pcre2compat.3
 create mode 100644 doc/pcre2grep.1
 create mode 100644 doc/pcre2grep.txt

diff --git a/Makefile.am b/Makefile.am
index 7df1e86..366b88c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,29 +6,31 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
 
 ## Specify the documentation files that are distributed.
 
-# FIXME
 dist_doc_DATA = \
   AUTHORS \
   COPYING \
   ChangeLog \
   LICENCE \
-  README
-
-#  doc/pcre.txt \
-#  doc/pcre-config.txt \
-#  doc/pcregrep.txt \
-#  doc/pcretest.txt \
-#  NEWS
+  NEWS \
+  README \
+  doc/pcre2.txt \
+  doc/pcre2-config.txt \
+  doc/pcre2grep.txt \
+  doc/pcre2test.txt
 
 # FIXME
-#dist_html_DATA = \
-#  doc/html/NON-AUTOTOOLS-BUILD.txt \
-#  doc/html/README.txt \
-#  doc/html/index.html \
-#  doc/html/pcre-config.html \
+dist_html_DATA = \
+  doc/html/NON-AUTOTOOLS-BUILD.txt \
+  doc/html/README.txt \
+  doc/html/index.html \
+  doc/html/pcre2-config.html \
+  doc/html/pcre2api.html \
+  doc/html/pcre2callout.html \
+  doc/html/pcre2demo.html \
+  doc/html/pcre2test.html \
+  doc/html/pcre2unicode.html
+
 #  doc/html/pcre.html \
-#  doc/html/pcre16.html \
-#  doc/html/pcre32.html \
 #  doc/html/pcre_assign_jit_stack.html \
 #  doc/html/pcre_compile.html \
 #  doc/html/pcre_compile2.html \
@@ -56,11 +58,8 @@ dist_doc_DATA = \
 #  doc/html/pcre_utf16_to_host_byte_order.html \
 #  doc/html/pcre_utf32_to_host_byte_order.html \
 #  doc/html/pcre_version.html \
-#  doc/html/pcreapi.html \
 #  doc/html/pcrebuild.html \
-#  doc/html/pcrecallout.html \
 #  doc/html/pcrecompat.html \
-#  doc/html/pcredemo.html \
 #  doc/html/pcregrep.html \
 #  doc/html/pcrejit.html \
 #  doc/html/pcrelimits.html \
@@ -72,18 +71,16 @@ dist_doc_DATA = \
 #  doc/html/pcreprecompile.html \
 #  doc/html/pcresample.html \
 #  doc/html/pcrestack.html \
-#  doc/html/pcresyntax.html \
-#  doc/html/pcretest.html \
-#  doc/html/pcreunicode.html
+#  doc/html/pcresyntax.html
 
 # FIXME
 dist_man_MANS = \
+  doc/pcre2-config.1 \
   doc/pcre2api.3 \
   doc/pcre2callout.3 \
   doc/pcre2test.1 \
   doc/pcre2unicode.3
 
-#  doc/pcre2-config.1 \
 #  doc/pcre2.3 \
 #  doc/pcre2-16.3 \
 #  doc/pcre2-32.3 \
@@ -168,7 +165,6 @@ EXTRA_DIST += \
 
 EXTRA_DIST += \
   doc/perltest.txt \
-  NON-UNIX-USE \
   NON-AUTOTOOLS-BUILD \
   HACKING
 
@@ -719,9 +715,9 @@ else
 coverage:
 	@echo "Configuring with --enable-coverage is required to generate code coverage report."
 
-DISTCLEANFILES += src/*.gcda src/*.gcno 
+DISTCLEANFILES += src/*.gcda src/*.gcno
 
-distclean-local: 
+distclean-local:
 	rm -rf $(PACKAGE)-$(VERSION)-coverage*
 
 endif # WITH_GCOV
diff --git a/PrepareRelease b/PrepareRelease
index c92d7f9..2b58a47 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -83,8 +83,7 @@ for file in pcre2api pcre2callout pcre2unicode ; do
 done
 
 # The three commands
-for file in pcre2test ; do
-# for file in pcre2test pcre2grep pcre-config ; do
+for file in pcre2test pcre2grep pcre2-config ; do
   echo Making $file.txt
   nroff -c -man $file.1 >$file.rawtxt
   perl ../CleanTxt <$file.rawtxt >$file.txt
@@ -133,7 +132,7 @@ echo "Making HTML documentation"
 /bin/rm html/*
 cp index.html.src html/index.html
 cp ../README html/README.txt
-# cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
+cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
 
 for file in *.1 ; do
   base=`basename $file .1`
@@ -187,7 +186,6 @@ files="\
   COPYING \
   AUTHORS \
   NEWS \
-  NON-UNIX-USE \
   NON-AUTOTOOLS-BUILD \
   INSTALL \
   132html \
@@ -240,16 +238,6 @@ files="\
   pcre32_utf32_utils.c \
   pcre16_valid_utf16.c \
   pcre32_valid_utf32.c \
-  pcre_scanner.cc \
-  pcre_scanner.h \
-  pcre_scanner_unittest.cc \
-  pcrecpp.cc \
-  pcrecpp.h \
-  pcrecpparg.h.in \
-  pcrecpp_unittest.cc \
-  pcre_stringpiece.cc \
-  pcre_stringpiece.h.in \
-  pcre_stringpiece_unittest.cc \
   perltest.pl \
   ucp.h \
   makevp.bat \
diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt
new file mode 100644
index 0000000..6f36fe6
--- /dev/null
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -0,0 +1,402 @@
+Building PCRE2 without using autotools
+--------------------------------------
+
+This document has been converted from the PCRE1 document, but is not yet 
+complete. I have removed a number of quite old sections about building in 
+various environments, as they applied only to PCRE1 and are probably out of 
+date.
+
+
+This document contains the following sections:
+
+  General
+  Generic instructions for the PCRE2 C library
+  Building for virtual Pascal
+  Stack size in Windows environments
+  Linking programs in Windows environments
+  Calling conventions in Windows environments
+  Comments about Win32 builds
+  Building PCRE2 on Windows with CMake
+  Testing with RunTest.bat
+  Building PCRE2 on native z/OS and z/VM
+
+
+GENERAL
+
+I (Philip Hazel) have no experience of Windows or VMS sytems and how their
+libraries work. The items in the PCRE2 distribution and Makefile that relate to
+anything other than Linux systems are untested by me.
+
+The basic PCRE2 library consists entirely of code written in Standard C, and so
+should compile successfully on any system that has a Standard C compiler and
+library.
+
+The PCRE2 distribution includes a "configure" file for use by the
+configure/make (autotools) build system, as found in many Unix-like
+environments. The README file contains information about the options for
+"configure".
+
+There is also support for CMake, which some users prefer, especially in Windows
+environments, though it can also be run in Unix-like environments. See the
+section entitled "Building PCRE2 on Windows with CMake" below.
+
+Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
+under the names src/config.h.generic and src/pcre2.h.generic. These are
+provided for those who build PCRE2 without using "configure" or CMake. If you
+use "configure" or CMake, the .generic versions are not used.
+
+
+GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
+
+The following are generic instructions for building the PCRE2 C library "by
+hand". If you are going to use CMake, this section does not apply to you; you
+can skip ahead to the CMake section.
+
+ (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
+     macro settings that it contains to whatever is appropriate for your
+     environment. In particular, you can alter the definition of the NEWLINE
+     macro to specify what character(s) you want to be interpreted as line
+     terminators.
+ 
+     When you compile any of the PCRE2 modules, you must specify
+     -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
+     sources.
+
+     An alternative approach is not to edit src/config.h, but to use -D on the
+     compiler command line to make any changes that you need to the
+     configuration options. In this case -DHAVE_CONFIG_H must not be set.
+
+     NOTE: There have been occasions when the way in which certain parameters
+     in src/config.h are used has changed between releases. (In the
+     configure/make world, this is handled automatically.) When upgrading to a
+     new release, you are strongly advised to review src/config.h.generic
+     before re-using what you had previously.
+
+ (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
+
+ (3) EITHER:
+       Copy or rename file src/pcre2_chartables.c.dist as
+       src/pcre2_chartables.c.
+
+     OR:
+       Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
+       if you have set up src/config.h), and then run it with the single
+       argument "src/pcre2_chartables.c". This generates a set of standard
+       character tables and writes them to that file. The tables are generated
+       using the default C locale for your system. If you want to use a locale
+       that is specified by LC_xxx environment variables, add the -L option to
+       the dftables command. You must use this method if you are building on a
+       system that uses EBCDIC code.
+
+     The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
+     specify alternative tables at run time.
+
+ (4) For an 8-bit library, compile the following source files, setting
+     -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set -DHAVE_CONFIG_H
+     if you have set up src/config.h with your configuration, or else use other
+     -D settings to change the configuration as required.
+
+       pcre2_auto_possess.c
+       pcre2_chartables.c
+       pcre2_compile.c
+       pcre2_config.c
+       pcre2_context.c 
+       pcre2_dfa_match.c
+       pcre2_error.c
+       pcre2_jit_compile.c
+       pcre2_jit_match.c
+       pcre2_jit_misc.c
+       pcre2_maketables.c
+       pcre2_match.c
+       pcre2_match_data.c
+       pcre2_newline.c
+       pcre2_ord2utf.c
+       pcre2_pattern_info.c
+       pcre2_string_utils.c
+       pcre2_study.c
+       pcre2_substring.c 
+       pcre2_tables.c
+       pcre2_ucd.c
+       pcre2_valid_utf.c
+       pcre2_xclass.c
+
+     Make sure that you include -I. in the compiler command (or equivalent for
+     an unusual compiler) so that all included PCRE2 header files are first
+     sought in the src directory under the current directory. Otherwise you run
+     the risk of picking up a previously-installed file from somewhere else.
+
+     Note that you must compile pcre2_jit_xxx.c, even if you have not defined
+     SUPPORT_JIT in src/config.h, because when JIT support is not configured,
+     dummy functions are compiled. When JIT support IS configured, the JIT
+     sources #include other files from the sljit subdirectory, where there
+     should be 16 files, all of whose names begin with "sljit".
+
+ (5) Now link all the compiled code into an object library in whichever form
+     your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
+     If your system has static and shared libraries, you may have to do this
+     once for each type.
+
+ (6) If you want to build a 16-bit library or 32-bit library (as well as, or
+     instead of the 8-bit library) just supply 16 or 32 as the value of
+     -DPCRE2_CODE_UNIT_WIDTH when you are compiling. 
+     
+ (7) If you want to build the POSIX wrapper functions (which apply only to the
+     8-bit library), ensure that you have the pcre2posix.h file and then
+     compile pcre2posix.c. Link the result (on its own) as the pcre2posix
+     library.
+
+ (8) The pcre2test program can be linked with any combination of the 8-bit,
+     16-bit and 32-bit libraries (depending on what you selected in
+     src/config.h). Compile pcre2test.c; don't forget -DHAVE_CONFIG_H if
+     necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
+     appropriate library/ies. If you compiled an 8-bit library, pcre2test also
+     needs the pcre2posix wrapper library.
+
+ (9) Run pcre2test on the testinput files in the testdata directory, and check
+     that the output matches the corresponding testoutput files. There are
+     comments about what each test does in the section entitled "Testing PCRE2"
+     in the README file. If you compiled more than one of the 8-bit, 16-bit and
+     32-bit libraries, you need to run pcre2test with the -16 option to do
+     16-bit tests and with the -32 option to do 32-bit tests.
+
+     Some tests are relevant only when certain build-time options are selected.
+     For example, test 4 is for Unicode support, and will not run if you have
+     built PCRE2 without it. See the comments at the start of each testinput
+     file. If you have a suitable Unix-like shell, the RunTest script will run
+     the appropriate tests for you. The command "RunTest list" will output a
+     list of all the tests.
+
+     Note that the supplied files are in Unix format, with just LF characters
+     as line terminators. You may need to edit them to change this if your
+     system uses a different convention.
+
+(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
+     by running pcre2test with the -jit option. This is done automatically by
+     the RunTest script. You might also like to build and run the freestanding
+     JIT test program, pcre2_jit_test.c.
+
+(11) If you want to use the pcre2grep command, compile and link pcre2grep.c; it
+     uses only the basic 8-bit PCRE2 library (it does not need the pcre2posix
+     library).
+
+
+BUILDING FOR VIRTUAL PASCAL
+
+FIXME FOR PCRE2
+
+A script for building PCRE2 using Borland's C++ compiler for use with VPASCAL
+was contributed by Alexander Tokarev. Stefan Weber updated the script and added
+additional files. The following files in the distribution are for building
+PCRE2 for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat,
+pcre2gexp.pas.
+
+
+STACK SIZE IN WINDOWS ENVIRONMENTS
+
+The default processor stack size of 1Mb in some Windows environments is too
+small for matching patterns that need much recursion. In particular, test 2 may
+fail because of this. Normally, running out of stack causes a crash, but there
+have been cases where the test program has just died silently. See your linker
+documentation for how to increase stack size if you experience problems. The
+Linux default of 8Mb is a reasonable choice for the stack, though even that can
+be too small for some pattern/subject combinations.
+
+PCRE2 has a compile configuration option to disable the use of stack for
+recursion so that heap is used instead. However, pattern matching is
+significantly slower when this is done. There is more about stack usage in the
+"pcre2stack" documentation.
+
+
+LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
+
+If you want to statically link a program against a PCRE2 library in the form of
+a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
+
+
+CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
+
+It is possible to compile programs to use different calling conventions using
+MSVC. Search the web for "calling conventions" for more information. To make it
+easier to change the calling convention for the exported functions in the
+PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
+definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
+not set, it defaults to empty; the default calling convention is then used
+(which is what is wanted most of the time).
+
+
+COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
+
+There are two ways of building PCRE2 using the "configure, make, make install"
+paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
+the same thing; they are completely different from each other. There is also
+support for building using CMake, which some users find a more straightforward
+way of building PCRE2 under Windows.
+
+The MinGW home page (http://www.mingw.org/) says this:
+
+  MinGW: A collection of freely available and freely distributable Windows
+  specific header files and import libraries combined with GNU toolsets that
+  allow one to produce native Windows programs that do not rely on any
+  3rd-party C runtime DLLs.
+
+The Cygwin home page (http://www.cygwin.com/) says this:
+
+  Cygwin is a Linux-like environment for Windows. It consists of two parts:
+
+  . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
+    substantial Linux API functionality
+
+  . A collection of tools which provide Linux look and feel.
+
+On both MinGW and Cygwin, PCRE2 should build correctly using:
+
+  ./configure && make && make install
+
+This should create two libraries called libpcre2-8 and libpcre2-posix. These
+are independent libraries: when you link with libpcre2-posix you must also link
+with libpcre2-8, which contains the basic functions.
+
+Using Cygwin's compiler generates libraries and executables that depend on
+cygwin1.dll. If a library that is generated this way is distributed,
+cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
+licence, this forces not only PCRE2 to be under the GPL, but also the entire
+application. A distributor who wants to keep their own code proprietary must
+purchase an appropriate Cygwin licence.
+
+MinGW has no such restrictions. The MinGW compiler generates a library or
+executable that can run standalone on Windows without any third party dll or
+licensing issues.
+
+But there is more complication:
+
+If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
+to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
+front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
+gcc and MinGW's gcc). So, a user can:
+
+. Build native binaries by using MinGW or by getting Cygwin and using
+  -mno-cygwin.
+
+. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
+  compiler flags.
+
+The test files that are supplied with PCRE2 are in UNIX format, with LF
+characters as line terminators. Unless your PCRE2 library uses a default
+newline option that includes LF as a valid newline, it may be necessary to
+change the line terminators in the test files to get some of the tests to work.
+
+
+BUILDING PCRE2 ON WINDOWS WITH CMAKE
+
+CMake is an alternative configuration facility that can be used instead of
+"configure". CMake creates project files (make files, solution files, etc.)
+tailored to numerous development environments, including Visual Studio,
+Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
+spaces in the names for your CMake installation and your PCRE2 source and build
+directories.
+
+The following instructions were contributed by a PCRE1 user, but they should 
+also work for PCRE2. If they are not followed exactly, errors may occur. In the
+event that errors do occur, it is recommended that you delete the CMake cache
+before attempting to repeat the CMake build process. In the CMake GUI, the
+cache can be deleted by selecting "File > Delete Cache".
+
+1.  Install the latest CMake version available from http://www.cmake.org/, and
+    ensure that cmake\bin is on your path.
+
+2.  Unzip (retaining folder structure) the PCRE2 source tree into a source
+    directory such as C:\pcre2. You should ensure your local date and time
+    is not earlier than the file dates in your source dir if the release is
+    very new.
+
+3.  Create a new, empty build directory, preferably a subdirectory of the
+    source dir. For example, C:\pcre2\pcre2-xx\build.
+
+4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
+    to start Cmake from the Windows Start menu, as this can lead to errors.
+
+5.  Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
+    build directories, respectively.
+
+6.  Hit the "Configure" button.
+
+7.  Select the particular IDE / build tool that you are using (Visual
+    Studio, MSYS makefiles, MinGW makefiles, etc.)
+
+8.  The GUI will then list several configuration options. This is where
+    you can enable Unicode support or other PCRE2 optional features.
+
+9.  Hit "Configure" again. The adjacent "Generate" button should now be
+    active.
+
+10. Hit "Generate".
+
+11. The build directory should now contain a usable build system, be it a
+    solution file for Visual Studio, makefiles for MinGW, etc. Exit from
+    cmake-gui and use the generated build system with your compiler or IDE.
+    E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
+    solution, select the desired configuration (Debug, or Release, etc.) and
+    build the ALL_BUILD project.
+
+12. If during configuration with cmake-gui you've elected to build the test
+    programs, you can execute them by building the test project. E.g., for
+    MinGW: "make check"; for Visual Studio build the RUN_TESTS project. The
+    most recent build configuration is targeted by the tests. A summary of
+    test results is presented. Complete test output is subsequently
+    available for review in Testing\Temporary under your build dir.
+
+
+TESTING WITH RUNTEST.BAT  FIXME FIXME NOT YET TESTED/UPDATED FIXME
+
+If configured with CMake, building the test project ("make check" or building
+ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
+on your configuration options, possibly other test programs) in the build
+directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
+
+For manual testing with RunTest.bat, provided the build dir is a subdirectory
+of the source directory: Open command shell window. Chdir to the location
+of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
+"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
+
+To run only a particular test with RunTest.Bat provide a test number argument.
+
+Otherwise:
+
+1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
+   have been created.
+
+2. Edit RunTest.bat to indentify the full or relative location of
+   the pcre2 source (wherein which the testdata folder resides), e.g.:
+
+   set srcdir=C:\pcre2\pcre2-10.00
+
+3. In a Windows command environment, chdir to the location of your bat and
+   exe programs.
+
+4. Run RunTest.bat. Test outputs will automatically be compared to expected
+   results, and discrepancies will be identified in the console output.
+
+To independently test the just-in-time compiler, run pcre2_jit_test.exe.
+
+
+BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
+
+z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
+The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
+applications can be supported through UNIX System Services, and in such an
+environment PCRE2 can be built in the same way as in other systems. However, in
+native z/OS (without UNIX System Services) and in z/VM, special ports are
+required. For details, please see this web site:
+
+  http://www.zaconsultants.net
+
+There is also a mirror here:
+
+  http://www.vsoft-software.com/downloads.html
+  
+The site currently has ports for PCRE1 releases, but PCRE2 should follow in due 
+course. 
+
+==========================
+Last Updated: 28 September 2014
diff --git a/doc/html/README.txt b/doc/html/README.txt
index 7ad597a..95c8747 100644
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@@ -1 +1,832 @@
-This is a placeholder README file for a work in progress.
+README file for PCRE2 (Perl-compatible regular expression library)
+------------------------------------------------------------------
+
+PCRE2 is a re-implementation of the original PCRE library with an entirely new 
+API. The latest release of PCRE2 is always available in three alternative
+formats from:
+
+FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
+
+  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.gz
+  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
+  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
+
+There is a mailing list for discussion about the development of PCRE (both the 
+original and new APIs) at pcre-dev@exim.org. You can access the archives and
+subscribe or manage your subscription here:
+
+   https://lists.exim.org/mailman/listinfo/pcre-dev
+
+Please read the NEWS file if you are upgrading from a previous release.
+The contents of this README file are:
+
+  The PCRE2 APIs
+  Documentation for PCRE2
+  Contributions by users of PCRE2
+  Building PCRE2 on non-Unix-like systems
+  Building PCRE2 without using autotools
+  Building PCRE2 using autotools
+  Retrieving configuration information
+  Shared libraries
+  Cross-compiling using autotools
+  Making new tarballs
+  Testing PCRE2
+  Character tables
+  File manifest
+
+
+The PCRE2 APIs
+--------------
+
+PCRE2 is written in C, and it has its own API. There are three sets of
+functions, one for the 8-bit library, which processes strings of bytes, one for
+the 16-bit library, which processes strings of 16-bit values, and one for the
+32-bit library, which processes strings of 32-bit values. As this is a new API, 
+there as yet no C++ wrappers.
+
+The distribution does contain a set of C wrapper functions for the 8-bit
+library that are based on the POSIX regular expression API (see the pcre2posix
+man page). These end up in the library called libpcre2posix. Note that this
+just provides a POSIX calling interface to PCRE2; the regular expressions
+themselves still follow Perl syntax and semantics. The POSIX API is restricted,
+and does not give full access to all of PCRE2's facilities.
+
+The header file for the POSIX-style functions is called pcre2posix.h. The
+official POSIX name is regex.h, but I did not want to risk possible problems
+with existing files of that name by distributing it that way. To use PCRE2 with
+an existing program that uses the POSIX API, pcre2posix.h will have to be
+renamed or pointed at by a link.
+
+If you are using the POSIX interface to PCRE2 and there is already a POSIX
+regex library installed on your system, as well as worrying about the regex.h
+header file (as mentioned above), you must also take care when linking programs
+to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
+pick up the POSIX functions of the same name from the other library.
+
+One way of avoiding this confusion is to compile PCRE2 with the addition of
+-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
+compiler flags (CFLAGS if you are using "configure" -- see below). This has the
+effect of renaming the functions so that the names no longer clash. Of course,
+you have to do the same thing for your applications, or write them using the
+new names.
+
+
+Documentation for PCRE2
+----------------------
+
+If you install PCRE2 in the normal way on a Unix-like system, you will end up
+with a set of man pages whose names all start with "pcre2". The one that is
+just called "pcre2" lists all the others. In addition to these man pages, the
+PCRE2 documentation is supplied in two other forms:
+
+  1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
+     doc/pcre2test.txt in the source distribution. The first of these is a
+     concatenation of the text forms of all the section 3 man pages except the
+     listing of pcre2demo.c and those that summarize individual functions. The
+     other two are the text forms of the section 1 man pages for the pcre2grep
+     and pcre2test commands. These text forms are provided for ease of scanning
+     with text editors or similar tools. They are installed in
+     <prefix>/share/doc/pcre2, where <prefix> is the installation prefix
+     (defaulting to /usr/local).
+
+  2. A set of files containing all the documentation in HTML form, hyperlinked
+     in various ways, and rooted in a file called index.html, is distributed in
+     doc/html and installed in <prefix>/share/doc/pcre2/html.
+
+
+Building PCRE2 on non-Unix-like systems
+--------------------------------------
+
+For a non-Unix-like system, please read the comments in the file
+NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
+"make" you may be able to build PCRE2 using autotools in the same way as for
+many Unix-like systems.
+
+PCRE2 can also be configured using CMake, which can be run in various ways 
+(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
+NON-AUTOTOOLS-BUILD has information about CMake.
+
+PCRE2 has been compiled on many different operating systems. It should be
+straightforward to build PCRE2 on any system that has a Standard C compiler and
+library, because it uses only Standard C functions.
+
+
+Building PCRE2 without using autotools
+-------------------------------------
+
+The use of autotools (in particular, libtool) is problematic in some
+environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
+file for ways of building PCRE2 without using autotools.
+
+
+Building PCRE2 using autotools
+-----------------------------
+
+The following instructions assume the use of the widely used "configure; make;
+make install" (autotools) process.
+
+To build PCRE2 on system that supports autotools, first run the "configure"
+command from the PCRE2 distribution directory, with your current directory set
+to the directory where you want the files to be created. This command is a
+standard GNU "autoconf" configuration script, for which generic instructions
+are supplied in the file INSTALL.
+
+Most commonly, people build PCRE2 within its own distribution directory, and in
+this case, on many systems, just running "./configure" is sufficient. However,
+the usual methods of changing standard defaults are available. For example:
+
+CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
+
+This command specifies that the C compiler should be run with the flags '-O2
+-Wall' instead of the default, and that "make install" should install PCRE2
+under /opt/local instead of the default /usr/local.
+
+If you want to build in a different directory, just run "configure" with that
+directory as current. For example, suppose you have unpacked the PCRE2 source
+into /source/pcre2/pcre2-xxx, but you want to build it in
+/build/pcre2/pcre2-xxx:
+
+cd /build/pcre2/pcre2-xxx
+/source/pcre2/pcre2-xxx/configure
+
+PCRE2 is written in C and is normally compiled as a C library. However, it is
+possible to build it as a C++ library, though the provided building apparatus
+does not have any features to support this.
+
+There are some optional features that can be included or omitted from the PCRE2
+library. They are also documented in the pcre2build man page.
+
+. By default, both shared and static libraries are built. You can change this
+  by adding one of these options to the "configure" command:
+
+  --disable-shared
+  --disable-static
+
+  (See also "Shared libraries on Unix-like systems" below.)
+
+. By default, only the 8-bit library is built. If you add --enable-pcre16 to
+  the "configure" command, the 16-bit library is also built. If you add
+  --enable-pcre32 to the "configure" command, the 32-bit library is also built.
+  If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
+  building the 8-bit library.
+
+. If you want to include support for just-in-time compiling, which can give
+  large performance improvements on certain platforms, add --enable-jit to the
+  "configure" command. This support is available only for certain hardware
+  architectures. If you try to enable it on an unsupported architecture, there
+  will be a compile time error.  FIXME: NOT YET IMPLEMENTED.
+
+. When JIT support is enabled, pcre2grep automatically makes use of it, unless
+  you add --disable-pcre2grep-jit to the "configure" command.
+
+. If you want to make use of the support for UTF-8 Unicode character strings in
+  the 8-bit library, UTF-16 Unicode character strings in the 16-bit library,
+  and UTF-32 Unicode character strings in the 32-bit library, you must add
+  --enable-unicode to the "configure" command. Without it, the code for
+  handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
+  configure one library with UTF support and the other without in the same
+  configuration.
+ 
+  Even when --enable-unicode is included, the use of a UTF encoding still has
+  to be enabled by an option at run time. When PCRE2 is compiled with this
+  option, its input can only either be ASCII or UTF-8/16/32, even when running
+  on EBCDIC platforms. It is not possible to use both --enable-unicode and
+  --enable-ebcdic at the same time.
+  
+  When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
+  includes support for the \P, \p, and \X sequences that recognize Unicode
+  character properties. However, only the basic two-letter properties such as
+  Lu are supported.
+
+. You can build PCRE2 to recognize either CR or LF or the sequence CRLF or any
+  of the preceding, or any of the Unicode newline sequences as indicating the
+  end of a line. Whatever you specify at build time is the default; the caller
+  of PCRE2 can change the selection at run time. The default newline indicator
+  is a single LF character (the Unix standard). You can specify the default
+  newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
+  or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
+  --enable-newline-is-any to the "configure" command, respectively.
+
+  If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
+  the standard tests will fail, because the lines in the test files end with
+  LF. Even if the files are edited to change the line endings, there are likely
+  to be some failures. With --enable-newline-is-anycrlf or
+  --enable-newline-is-any, many tests should succeed, but there may be some
+  failures.
+
+. By default, the sequence \R in a pattern matches any Unicode line ending
+  sequence. This is independent of the option specifying what PCRE2 considers
+  to be the end of a line (see above). However, the caller of PCRE2 can
+  restrict \R to match only CR, LF, or CRLF. You can make this the default by
+  adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
+
+. PCRE2 has a counter that limits the depth of nesting of parentheses in a
+  pattern. This limits the amount of system stack that a pattern uses when it
+  is compiled. The default is 250, but you can change it by setting, for
+  example,
+
+  --with-parens-nest-limit=500
+
+. PCRE2 has a counter that can be set to limit the amount of resources it uses
+  when matching a pattern. If the limit is exceeded during a match, the match
+  fails. The default is ten million. You can change the default by setting, for
+  example,
+
+  --with-match-limit=500000
+
+  on the "configure" command. This is just the default; individual calls to
+  pcre2_match() can supply their own value. There is more discussion on the
+  pcre2api man page.
+
+. There is a separate counter that limits the depth of recursive function calls
+  during a matching process. This also has a default of ten million, which is
+  essentially "unlimited". You can change the default by setting, for example,
+
+  --with-match-limit-recursion=500000
+
+  Recursive function calls use up the runtime stack; running out of stack can
+  cause programs to crash in strange ways. There is a discussion about stack
+  sizes in the pcre2stack man page.
+
+. In the 8-bit library, the default maximum compiled pattern size is around 
+  64K. You can increase this by adding --with-link-size=3 to the "configure"
+  command. PCRE2 then uses three bytes instead of two for offsets to different
+  parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
+  the same as --with-link-size=4, which (in both libraries) uses four-byte
+  offsets. Increasing the internal link size reduces performance. In the 32-bit
+  library, the link size setting is ignored, as 4-byte offsets are always used.
+
+. You can build PCRE2 so that its internal match() function that is called from
+  pcre2_match() does not call itself recursively. Instead, it uses memory
+  blocks obtained from the heap to save data that would otherwise be saved on
+  the stack. To build PCRE2 like this, use
+
+  --disable-stack-for-recursion
+
+  on the "configure" command. PCRE2 runs more slowly in this mode, but it may
+  be necessary in environments with limited stack sizes. This applies only to
+  the normal execution of the pcre2_match() function; if JIT support is being
+  successfully used, it is not relevant. Equally, it does not apply to
+  pcre2_dfa_match(), which does not use deeply nested recursion. There is a
+  discussion about stack sizes in the pcre2stack man page.
+
+. For speed, PCRE2 uses four tables for manipulating and identifying characters
+  whose code point values are less than 256. By default, it uses a set of
+  tables for ASCII encoding that is part of the distribution. If you specify
+
+  --enable-rebuild-chartables
+
+  a program called dftables is compiled and run in the default C locale when
+  you obey "make". It builds a source file called pcre2_chartables.c. If you do
+  not specify this option, pcre2_chartables.c is created as a copy of
+  pcre2_chartables.c.dist. See "Character tables" below for further
+  information.
+
+. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
+  character code (as opposed to ASCII/Unicode) by specifying
+
+  --enable-ebcdic
+
+  This automatically implies --enable-rebuild-chartables (see above). However,
+  when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
+  both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
+  which specifies that the code value for the EBCDIC NL character is 0x25
+  instead of the default 0x15.
+
+. In environments where valgrind is installed, if you specify
+
+  --enable-valgrind
+
+  PCRE2 will use valgrind annotations to mark certain memory regions as
+  unaddressable. This allows it to detect invalid memory accesses, and is
+  mostly useful for debugging PCRE2 itself.
+
+. In environments where the gcc compiler is used and lcov version 1.6 or above
+  is installed, if you specify
+
+  --enable-coverage
+
+  the build process implements a code coverage report for the test suite. The
+  report is generated by running "make coverage". If ccache is installed on
+  your system, it must be disabled when building PCRE2 for coverage reporting.
+  You can do this by setting the environment variable CCACHE_DISABLE=1 before
+  running "make" to build PCRE2. There is more information about coverage
+  reporting in the "pcre2build" documentation.
+
+. The pcre2grep program currently supports only 8-bit data files, and so
+  requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
+  libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
+  specifying one or both of
+
+  --enable-pcre2grep-libz
+  --enable-pcre2grep-libbz2
+
+  Of course, the relevant libraries must be installed on your system.
+
+. The default size (in bytes) of the internal buffer used by pcre2grep can be
+  set by, for example:
+
+  --with-pcre2grep-bufsize=51200
+
+  The value must be a plain integer. The default is 20480.
+
+. It is possible to compile pcre2test so that it links with the libreadline
+  or libedit libraries, by specifying, respectively,
+
+  --enable-pcre2test-libreadline or --enable-pcre2test-libedit
+
+  If this is done, when pcre2test's input is from a terminal, it reads it using
+  the readline() function. This provides line-editing and history facilities.
+  Note that libreadline is GPL-licenced, so if you distribute a binary of
+  pcre2test linked in this way, there may be licensing issues. These can be
+  avoided by linking with libedit (which has a BSD licence) instead.
+
+  Enabling libreadline causes the -lreadline option to be added to the
+  pcre2test build. In many operating environments with a sytem-installed
+  readline library this is sufficient. However, in some environments (e.g. if
+  an unmodified distribution version of readline is in use), it may be
+  necessary to specify something like LIBS="-lncurses" as well. This is
+  because, to quote the readline INSTALL, "Readline uses the termcap functions,
+  but does not link with the termcap or curses library itself, allowing
+  applications which link with readline the to choose an appropriate library."
+  If you get error messages about missing functions tgetstr, tgetent, tputs,
+  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
+  should fix it.
+
+The "configure" script builds the following files for the basic C library:
+
+. Makefile             the makefile that builds the library
+. src/config.h         build-time configuration options for the library
+. src/pcre2.h          the public PCRE2 header file
+. pcre2-config          script that shows the building settings such as CFLAGS
+                         that were set for "configure"
+. libpcre2-8.pc        ) 
+. libpcre2-16.pc       ) data for the pkg-config command
+. libpcre2-32.pc       )
+. libpcre2-posix.pc    )
+. libtool              script that builds shared and/or static libraries
+
+Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
+tarballs under the names config.h.generic and pcre2.h.generic. These are
+provided for those who have to build PCRE2 without using "configure" or CMake.
+If you use "configure" or CMake, the .generic versions are not used.
+
+The "configure" script also creates config.status, which is an executable
+script that can be run to recreate the configuration, and config.log, which
+contains compiler output from tests that "configure" runs.
+
+Once "configure" has run, you can run "make". This builds whichever of the
+libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
+program called pcre2test. If you enabled JIT support with --enable-jit, another
+test program called pcre2_jit_test is built as well. FIXME: still to be
+implemented. If the 8-bit library is built, libpcre2-posix and the pcre2grep
+command are also built.
+
+The command "make check" runs all the appropriate tests. Details of the PCRE2
+tests are given below in a separate section of this document.
+
+You can use "make install" to install PCRE2 into live directories on your
+system. The following are installed (file names are all relative to the
+<prefix> that is set when "configure" is run):
+
+  Commands (bin):
+    pcre2test
+    pcre2grep (if 8-bit support is enabled)
+    pcre2-config
+
+  Libraries (lib):
+    libpcre2-8      (if 8-bit support is enabled)
+    libpcre2-16     (if 16-bit support is enabled)
+    libpcre2-32     (if 32-bit support is enabled)
+    libpcre2-posix  (if 8-bit support is enabled)
+
+  Configuration information (lib/pkgconfig):
+    libpcre2-8.pc
+    libpcre2-16.pc
+    libpcre2-32.pc
+    libpcre2-posix.pc
+
+  Header files (include):
+    pcre2.h
+    pcre2posix.h
+
+  Man pages (share/man/man{1,3}):
+    pcre2grep.1
+    pcre2test.1
+    pcre2-config.1
+    pcre2.3
+    pcre2*.3 (lots more pages, all starting "pcre2")
+
+  HTML documentation (share/doc/pcre2/html):
+    index.html
+    *.html (lots more pages, hyperlinked from index.html)
+
+  Text file documentation (share/doc/pcre2):
+    AUTHORS
+    COPYING
+    ChangeLog
+    LICENCE
+    NEWS
+    README
+    pcre2.txt         (a concatenation of the man(3) pages)
+    pcre2test.txt     the pcre2test man page
+    pcre2grep.txt     the pcre2grep man page
+    pcre2-config.txt  the pcre2-config man page
+
+If you want to remove PCRE2 from your system, you can run "make uninstall".
+This removes all the files that "make install" installed. However, it does not
+remove any directories, because these are often shared with other programs.
+
+
+Retrieving configuration information
+------------------------------------
+
+Running "make install" installs the command pcre2-config, which can be used to
+recall information about the PCRE2 configuration and installation. For example:
+
+  pcre2-config --version
+
+prints the version number, and
+
+  pcre2-config --libs8
+
+outputs information about where the 8-bit library is installed. This command
+can be included in makefiles for programs that use PCRE2, saving the programmer
+from having to remember too many details. Run pcre2-config with no arguments to 
+obtain a list of possible arguments.
+
+The pkg-config command is another system for saving and retrieving information
+about installed libraries. Instead of separate commands for each library, a
+single command is used. For example:
+
+  pkg-config --libs libpcre2-16
+
+The data is held in *.pc files that are installed in a directory called
+<prefix>/lib/pkgconfig.
+
+
+Shared libraries
+----------------
+
+The default distribution builds PCRE2 as shared libraries and static libraries,
+as long as the operating system supports shared libraries. Shared library
+support relies on the "libtool" script which is built as part of the
+"configure" process.
+
+The libtool script is used to compile and link both shared and static
+libraries. They are placed in a subdirectory called .libs when they are newly
+built. The programs pcre2test and pcre2grep are built to use these uninstalled
+libraries (by means of wrapper scripts in the case of shared libraries). When
+you use "make install" to install shared libraries, pcre2grep and pcre2test are
+automatically re-built to use the newly installed shared libraries before being
+installed themselves. However, the versions left in the build directory still
+use the uninstalled libraries.
+
+To build PCRE2 using static libraries only you must use --disable-shared when
+configuring it. For example:
+
+./configure --prefix=/usr/gnu --disable-shared
+
+Then run "make" in the usual way. Similarly, you can use --disable-static to
+build only shared libraries.
+
+
+Cross-compiling using autotools
+-------------------------------
+
+You can specify CC and CFLAGS in the normal way to the "configure" command, in
+order to cross-compile PCRE2 for some other host. However, you should NOT
+specify --enable-rebuild-chartables, because if you do, the dftables.c source
+file is compiled and run on the local host, in order to generate the inbuilt
+character tables (the pcre2_chartables.c file). This will probably not work,
+because dftables.c needs to be compiled with the local compiler, not the cross
+compiler.
+
+When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
+created by making a copy of pcre2_chartables.c.dist, which is a default set of
+tables that assumes ASCII code. Cross-compiling with the default tables should
+not be a problem.
+
+If you need to modify the character tables when cross-compiling, you should
+move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
+and run it on the local host to make a new version of pcre2_chartables.c.dist.
+Then when you cross-compile PCRE2 this new version of the tables will be used.
+
+
+Making new tarballs
+-------------------
+
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.
+
+If you have modified any of the man page sources in the doc directory, you
+should first run the PrepareRelease script before making a distribution. This
+script creates the .txt and HTML forms of the documentation from the man pages.
+
+
+Testing PCRE2
+------------
+
+To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
+There is another script called RunGrepTest that tests the options of the
+pcre2grep command. When JIT support is enabled, another test program called
+pcre2_jit_test is built. Both the scripts and all the program tests are run if
+you obey "make check". For other environments, see the instructions in
+NON-AUTOTOOLS-BUILD.
+
+The RunTest script runs the pcre2test test program (which is documented in its
+own man page) on each of the relevant testinput files in the testdata
+directory, and compares the output with the contents of the corresponding
+testoutput files. RunTest uses a file called testtry to hold the main output
+from pcre2test. Other files whose names begin with "test" are used as working
+files in some tests.
+
+Some tests are relevant only when certain build-time options were selected. For
+example, the tests for UTF-8/16/32 support are run only if --enable-unicode was
+used. RunTest outputs a comment when it skips a test.
+
+Many of the tests that are not skipped are run twice if JIT support is
+available. On the second run, JIT compilation is forced. This testing can be
+suppressed by putting "nojit" on the RunTest command line.
+
+The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
+libraries that are enabled. If you want to run just one set of tests, call
+RunTest with either the -8, -16 or -32 option.
+
+If valgrind is installed, you can run the tests under it by putting "valgrind"
+on the RunTest command line. To run pcre2test on just one or more specific test
+files, give their numbers as arguments to RunTest, for example:
+
+  RunTest 2 7 11
+
+You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
+end), or a number preceded by ~ to exclude a test. For example:
+
+  Runtest 3-15 ~10
+
+This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
+except test 13. Whatever order the arguments are in, the tests are always run
+in numerical order.
+
+You can also call RunTest with the single argument "list" to cause it to output
+a list of tests.
+
+The first two tests can always be run, as they expect only plain text strings
+(not UTF) and make no use of Unicode properties. The first test file can be fed
+directly into the perltest.pl script to check that Perl gives the same results.
+The only difference you should see is in the first few lines, where the Perl
+version is given instead of the PCRE2 version. The second set of tests check
+auxiliary functions, error detection, and run-time flags that are specific to
+PCRE2, as well as the POSIX wrapper API. It also uses the debugging flags to
+check some of the internals of pcre2_compile().
+
+If you build PCRE2 with a locale setting that is not the standard C locale, the
+character tables may be different (see next paragraph). In some cases, this may
+cause failures in the second set of tests. For example, in a locale where the
+isprint() function yields TRUE for characters in the range 128-255, the use of
+[:isascii:] inside a character class defines a different set of characters, and
+this shows up in this test as a difference in the compiled code, which is being
+listed for checking. Where the comparison test output contains [\x00-\x7f] the
+test will contain [\x00-\xff], and similarly in some other cases. This is not a
+bug in PCRE2.
+
+The third set of tests checks pcre2_maketables(), the facility for building a
+set of character tables for a specific locale and using them instead of the
+default tables. The script uses the "locale" command to check for the 
+availability of the "fr_FR", "french", or "fr" locale, and uses the first one
+that it finds. If the "locale" command fails, or if its output doesn't include
+"fr_FR", "french", or "fr" in the list of available locales, the third test
+cannot be run, and a comment is output to say why. If running this test
+produces an error like this
+
+  ** Failed to set locale "fr_FR"
+
+it means that the given locale is not available on your system, despite being
+listed by "locale". This does not mean that PCRE2 is broken. There are three
+alternative output files for the third test, because three different versions
+of the French locale have been encountered. The test passes if its output
+matches any one of them.
+
+The fourth and fifth tests check UTF and Unicode property support, the fourth
+being compatible with the perltest.pl script, and the fifth checking 
+PCRE2-specific things.
+
+The sixth and seventh tests check the pcre2_dfa_match() alternative matching
+function, in non-UTF mode and UTF-mode with Unicode property support,
+respectively.
+
+The eighth test checks some internal offsets and code size features; it is
+run only when the default "link size" of 2 is set (in other cases the sizes
+change) and when Unicode support is enabled.
+
+The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
+twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
+generate different output in 8-bit mode. Each pair are for general cases and
+Unicode support, respectively. The thirteenth test checks the handling of 
+non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit 
+modes.
+
+The fourteenth test is run only when JIT support is not available, and the
+fifteenth test is run only when JIT support is available. They test some
+JIT-specific features such as information output from pcre2test about JIT
+compilation.
+
+The sixteenth and seventeenth tests are run only in 8-bit mode. They check the 
+POSIX interface to the 8-bit library, withouth and with Unicode support, 
+respectively.
+
+
+Character tables
+----------------
+
+For speed, PCRE2 uses four tables for manipulating and identifying characters
+whose code point values are less than 256. By default, a set of tables that is
+built into the library is used. The pcre2_maketables() function can be called
+by an application to create a new set of tables in the current locale. This are
+passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
+compile context.
+
+The source file called pcre2_chartables.c contains the default set of tables.
+By default, this is created as a copy of pcre2_chartables.c.dist, which
+contains tables for ASCII coding. However, if --enable-rebuild-chartables is
+specified for ./configure, a different version of pcre2_chartables.c is built
+by the program dftables (compiled from dftables.c), which uses the ANSI C
+character handling functions such as isalnum(), isalpha(), isupper(),
+islower(), etc. to build the table sources. This means that the default C
+locale which is set for your system will control the contents of these default
+tables. You can change the default tables by editing pcre2_chartables.c and
+then re-building PCRE2. If you do this, you should take care to ensure that the
+file does not get automatically re-generated. The best way to do this is to
+move pcre2_chartables.c.dist out of the way and replace it with your customized
+tables.
+
+When the dftables program is run as a result of --enable-rebuild-chartables,
+it uses the default C locale that is set on your system. It does not pay
+attention to the LC_xxx environment variables. In other words, it uses the
+system's default locale rather than whatever the compiling user happens to have
+set. If you really do want to build a source set of character tables in a
+locale that is specified by the LC_xxx variables, you can run the dftables
+program by hand with the -L option. For example:
+
+  ./dftables -L pcre2_chartables.c.special
+
+The first two 256-byte tables provide lower casing and case flipping functions,
+respectively. The next table consists of three 32-byte bit maps which identify
+digits, "word" characters, and white space, respectively. These are used when
+building 32-byte bit maps that represent character classes for code points less
+than 256. The final 256-byte table has bits indicating various character types,
+as follows:
+
+    1   white space character
+    2   letter
+    4   decimal digit
+    8   hexadecimal digit
+   16   alphanumeric or '_'
+  128   regular expression metacharacter or binary zero
+
+You should not alter the set of characters that contain the 128 bit, as that
+will cause PCRE2 to malfunction.
+
+
+File manifest
+-------------
+
+The distribution should contain the files listed below. 
+
+(A) Source files for the PCRE2 library functions and their headers are found in 
+    the src directory:
+
+  src/dftables.c           auxiliary program for building pcre2_chartables.c
+                           when --enable-rebuild-chartables is specified
+
+  src/pcre2_chartables.c.dist  a default set of character tables that assume
+                           ASCII coding; unless --enable-rebuild-chartables is
+                           specified, used by copying to pcre2_chartables.c
+
+  src/pcre2posix.c         )
+  src/pcre2_auto_possess.c ) 
+  src/pcre2_compile.c      )
+  src/pcre2_config.c       )
+  src/pcre2_context.c      ) 
+  src/pcre2_dfa_match.c    )
+  src/pcre2_error.c        ) 
+  src/pcre2_exec.c         )
+  src/pcre2_jit_compile.c  )
+  src/pcre2_jit_match.c    ) sources for the functions in the library,  
+  src/pcre2_jit_misc.c     )   and some internal functions that they use
+  src/pcre2_maketables.c   )
+  src/pcre2_match.c        )
+  src/pcre2_match_data.c   )  
+  src/pcre2_newline.c      )
+  src/pcre2_ord2utf.c      )
+  src/pcre2_pattern_info.c )
+  src/pcre2_string_utils.c )
+  src/pcre2_study.c        )
+  src/pcre2_substring.c    ) 
+  src/pcre2_tables.c       )
+  src/pcre2_ucd.c          )
+  src/pcre2_valid_utf.c    )
+  src/pcre2_xclass.c       )
+
+  src/pcre2_printint.c     debugging function that is used by pcre2test,
+
+  src/config.h.in          template for config.h, when built by "configure"
+  src/pcre2.h.in           template for pcre2.h when built by "configure"
+  src/pcre2posix.h         header for the external POSIX wrapper API
+  src/pcre2_internal.h     header for internal use
+  src/pcre2_intmodedep.h   a mode-specific internal header
+  src/pcre2_ucp.h          header for Unicode property handling
+
+  sljit/*                  16 files that make up the JIT compiler FIXME
+
+(B) Source files for programs that use PCRE2:
+
+  src/pcre2demo.c          simple demonstration of coding calls to PCRE2
+  src/pcre2grep.c          source of a grep utility that uses PCRE2
+  src/pcre2test.c          comprehensive test program
+
+(C) Auxiliary files:
+
+  132html                  script to turn "man" pages into HTML
+  AUTHORS                  information about the author of PCRE2
+  ChangeLog                log of changes to the code
+  CleanTxt                 script to clean nroff output for txt man pages
+  Detrail                  script to remove trailing spaces
+  HACKING                  some notes about the internals of PCRE2
+  INSTALL                  generic installation instructions
+  LICENCE                  conditions for the use of PCRE2
+  COPYING                  the same, using GNU's standard name
+  Makefile.in              ) template for Unix Makefile, which is built by
+                           )   "configure"
+  Makefile.am              ) the automake input that was used to create
+                           )   Makefile.in
+  NEWS                     important changes in this release
+  NON-AUTOTOOLS-BUILD      notes on building PCRE2 without using autotools
+  PrepareRelease           script to make preparations for "make dist"
+  README                   this file
+  RunTest                  a Unix shell script for running tests
+  RunGrepTest              a Unix shell script for pcre2grep tests
+  aclocal.m4               m4 macros (generated by "aclocal")
+  config.guess             ) files used by libtool,
+  config.sub               )   used only when building a shared library
+  configure                a configuring shell script (built by autoconf)
+  configure.ac             ) the autoconf input that was used to build
+                           )   "configure" and config.h
+  depcomp                  ) script to find program dependencies, generated by
+                           )   automake
+  doc/*.3                  man page sources for PCRE2
+  doc/*.1                  man page sources for pcre2grep and pcre2test
+  doc/index.html.src       the base HTML page
+  doc/html/*               HTML documentation
+  doc/pcre2.txt            plain text version of the man pages
+  doc/pcre2test.txt        plain text documentation of test program
+  doc/perltest.txt         plain text documentation of Perl test program
+  install-sh               a shell script for installing files
+  libpcre2-8.pc.in         template for libpcre2-8.pc for pkg-config
+  libpcre2-16.pc.in        template for libpcre2-16.pc for pkg-config
+  libpcre2-32.pc.in        template for libpcre2-32.pc for pkg-config
+  libpcre2posix.pc.in      template for libpcre2posix.pc for pkg-config
+  ltmain.sh                file used to build a libtool script
+  missing                  ) common stub for a few missing GNU programs while
+                           )   installing, generated by automake
+  mkinstalldirs            script for making install directories
+  perltest.pl              Perl test program
+  pcre2-config.in          source of script which retains PCRE2 information
+  pcre2_jit_test.c         test program for the JIT compiler
+  testdata/testinput*      test data for main library tests
+  testdata/testoutput*     expected test results
+  testdata/grep*           input and output for pcre2grep tests
+  testdata/*               other supporting test files
+
+(D) Auxiliary files for cmake support
+
+  cmake/COPYING-CMAKE-SCRIPTS
+  cmake/FindPackageHandleStandardArgs.cmake
+  cmake/FindEditline.cmake
+  cmake/FindReadline.cmake
+  CMakeLists.txt
+  config-cmake.h.in
+
+(E) Auxiliary files for VPASCAL  FIXME FIXME
+
+  makevp.bat
+  makevp_c.txt
+  makevp_l.txt
+  pcre2gexp.pas
+
+(F) Auxiliary files for building PCRE2 "by hand"
+
+  pcre2.h.generic         ) a version of the public PCRE2 header file
+                          )   for use in non-"configure" environments
+  config.h.generic        ) a version of config.h for use in non-"configure"
+                          )   environments
+
+(F) Miscellaneous
+
+  RunTest.bat            a script for running tests under Windows  FIXME
+
+Philip Hazel
+Email local part: ph10
+Email domain: cam.ac.uk
+Last updated: 27 October 2014
diff --git a/doc/html/pcre2-config.html b/doc/html/pcre2-config.html
new file mode 100644
index 0000000..b71d760
--- /dev/null
+++ b/doc/html/pcre2-config.html
@@ -0,0 +1,102 @@
+<html>
+<head>
+<title>pcre2-config specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2-config man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">OPTIONS</a>
+<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
+<li><a name="TOC5" href="#SEC5">AUTHOR</a>
+<li><a name="TOC6" href="#SEC6">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>pcre2-config  [--prefix] [--exec-prefix] [--version]</b>
+<b>             [--libs8] [--libs16] [--libs32] [--libs-posix]</b>
+<b>             [--cflags] [--cflags-posix]</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+<b>pcre2-config</b> returns the configuration of the installed PCRE2 libraries
+and the options required to compile a program to use them. Some of the options
+apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
+not available for libraries that have not been built. If an unavailable option
+is encountered, the "usage" information is output.
+</P>
+<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
+<P>
+<b>--prefix</b>
+Writes the directory prefix used in the PCRE2 installation for architecture
+independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
+systems) to the standard output.
+</P>
+<P>
+<b>--exec-prefix</b>
+Writes the directory prefix used in the PCRE2 installation for architecture
+dependent files (normally the same as <b>--prefix</b>) to the standard output.
+</P>
+<P>
+<b>--version</b>
+Writes the version number of the installed PCRE2 libraries to the standard
+output.
+</P>
+<P>
+<b>--libs8</b>
+Writes to the standard output the command line options required to link
+with the 8-bit PCRE2 library (<b>-lpcre2-8</b> on many systems).
+</P>
+<P>
+<b>--libs16</b>
+Writes to the standard output the command line options required to link
+with the 16-bit PCRE2 library (<b>-lpcre2-16</b> on many systems).
+</P>
+<P>
+<b>--libs32</b>
+Writes to the standard output the command line options required to link
+with the 32-bit PCRE2 library (<b>-lpcre2-32</b> on many systems).
+</P>
+<P>
+<b>--libs-posix</b>
+Writes to the standard output the command line options required to link with
+PCRE2's POSIX API wrapper library (<b>-lpcre2-posix</b> <b>-lpcre2-8</b> on many
+systems).
+</P>
+<P>
+<b>--cflags</b>
+Writes to the standard output the command line options required to compile
+files that use PCRE2 (this may include some <b>-I</b> options, but is blank on
+many systems).
+</P>
+<P>
+<b>--cflags-posix</b>
+Writes to the standard output the command line options required to compile
+files that use PCRE2's POSIX API wrapper library (this may include some
+<b>-I</b> options, but is blank on many systems).
+</P>
+<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2(3)</b>
+</P>
+<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
+<P>
+This manual page was originally written by Mark Baker for the Debian GNU/Linux
+system. It has been subsequently revised as a generic PCRE2 man page.
+</P>
+<br><a name="SEC6" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html
new file mode 100644
index 0000000..f8672b8
--- /dev/null
+++ b/doc/html/pcre2.html
@@ -0,0 +1,182 @@
+<html>
+<head>
+<title>pcre2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2 man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
+<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
+<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
+<li><a name="TOC4" href="#SEC4">AUTHOR</a>
+<li><a name="TOC5" href="#SEC5">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
+<P>
+PCRE2 is the name used for a revised API for the PCRE library, which is a set
+of functions, written in C, that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences. Some
+features that appeared in Python and the original PCRE before they appeared in
+Perl are also available using the Python syntax, there is some support for one
+or two .NET and Oniguruma syntax items, and there are options for requesting
+some minor changes that give better ECMAScript (aka JavaScript) compatibility.
+</P>
+<P>
+The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
+code units, which means that up to three separate libraries may be installed.
+The original work to extend PCRE to 16-bit and 32-bit code units was done by
+Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
+can be interpreted either as one character per code unit, or as UTF-encoded
+Unicode, with support for Unicode general category properties. Unicode is
+optional at build time, and must be enabled explicitly at run time. The version
+of Unicode in use can be discovered by running
+<pre>
+  pcre2test -C
+</PRE>
+</P>
+<P>
+The three libraries contain identical sets of functions, with names ending in 
+_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However, 
+by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just 
+one code unit width can be written using generic names such as
+<b>pcre2_compile()</b>, and the documentation is written assuming that this is 
+the case.
+</P>
+<P>
+In addition to the Perl-compatible matching function, PCRE2 contains an
+alternative function that matches the same compiled patterns in a different
+way. In certain circumstances, the alternative function has some advantages.
+For a discussion of the two matching algorithms, see the
+<a href="pcre2matching.html"><b>pcre2matching</b></a>
+page.
+</P>
+<P>
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE2 are given in separate documents. See the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+and
+<a href="pcre2compat.html"><b>pcre2compat</b></a>
+pages. There is a syntax summary in the
+<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
+page.
+</P>
+<P>
+Some features of PCRE2 can be included, excluded, or changed when the library
+is built. The
+<a href="pcre2_config.html"><b>pcre2_config()</b></a>
+function makes it possible for a client to discover which features are
+available. The features themselves are described in the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+page. Documentation about building PCRE2 for various operating systems can be
+found in the
+<a href="README.txt"><b>README</b></a>
+and
+<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
+files in the source distribution.
+</P>
+<P>
+The libraries contains a number of undocumented internal functions and data
+tables that are used by more than one of the exported external functions, but
+which are not intended for use by external callers. Their names all begin with
+"_pcre2", which hopefully will not provoke any name clashes. In some
+environments, it is possible to control which external symbols are exported
+when a shared library is built, and in these cases the undocumented symbols are
+not exported.
+</P>
+<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
+<P>
+If you are using PCRE2 in a non-UTF application that permits users to supply
+arbitrary patterns for compilation, you should be aware of a feature that
+allows users to turn on UTF support from within a pattern, provided that PCRE2
+was built with Unicode support. For example, an 8-bit pattern that begins with
+"(*UTF)" turns on UTF-8 mode, which interprets patterns and subjects as strings
+of UTF-8 code units instead of individual 8-bit characters. This causes both
+the pattern and any data against which it is matched to be checked for UTF-8
+validity. If the data string is very long, such a check might use sufficiently
+many resources as to cause your application to lose performance.
+</P>
+<P>
+One way of guarding against this possibility is to use the
+<b>pcre2_pattern_info()</b> function to check the compiled pattern's options for
+UTF. Alternatively, you can set the PCRE2_NEVER_UTF option at compile time.
+This causes an compile time error if a pattern contains a UTF-setting sequence.
+</P>
+<P>
+If your application is one that supports UTF, be aware that validity checking
+can take time. If the same data string is to be matched many times, you can use
+the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
+running redundant checks.
+</P>
+<P>
+Another way that performance can be hit is by running a pattern that has a very
+large search tree against a string that will never match. Nested unlimited
+repeats in a pattern are a common example. PCRE2 provides some protection
+against this: see the <b>pcre2_set_match_limit()</b> function in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page.
+</P>
+<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
+<P>
+The user documentation for PCRE2 comprises a number of different sections. In
+the "man" format, each of these is a separate "man page". In the HTML format,
+each is a separate page, linked from the index page. In the plain text format,
+the descriptions of the <b>pcre2grep</b> and <b>pcre2test</b> programs are in
+files called <b>pcre2grep.txt</b> and <b>pcre2test.txt</b>, respectively. The
+remaining sections, except for the <b>pcre2demo</b> section (which is a program
+listing), and the short pages for individual functions, are concatenated in
+<b>pcre2.txt</b>, for ease of searching. The sections are as follows:
+<pre>
+  pcre2              this document FIXME CHECK THIS LIST
+  pcre2-config       show PCRE2 installation configuration information
+  pcre2api           details of PCRE2's native C API
+  pcre2build         building PCRE2
+  pcre2callout       details of the callout feature
+  pcre2compat        discussion of Perl compatibility
+  pcre2demo          a demonstration C program that uses PCRE2
+  pcre2grep          description of the <b>pcre2grep</b> command (8-bit only)
+  pcre2jit           discussion of the just-in-time optimization support
+  pcre2limits        details of size and other limits
+  pcre2matching      discussion of the two matching algorithms
+  pcre2partial       details of the partial matching facility
+  pcre2pattern       syntax and semantics of supported regular expressions
+  pcre2perform       discussion of performance issues
+  pcre2posix         the POSIX-compatible C API for the 8-bit library
+  pcre2sample        discussion of the pcre2demo program
+  pcre2stack         discussion of stack usage
+  pcre2syntax        quick syntax reference
+  pcre2test          description of the <b>pcre2test</b> testing command
+  pcre2unicode       discussion of Unicode and UTF support
+</pre>
+In the "man" and HTML formats, there is also a short page for each C library
+function, listing its arguments and results.
+</P>
+<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<P>
+Putting an actual email address here is a spam magnet. If you want to email me,
+use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+</P>
+<br><a name="SEC5" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
new file mode 100644
index 0000000..6528a38
--- /dev/null
+++ b/doc/html/pcre2grep.html
@@ -0,0 +1,759 @@
+<html>
+<head>
+<title>pcre2grep specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2grep man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
+<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
+<li><a name="TOC5" href="#SEC5">OPTIONS</a>
+<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
+<li><a name="TOC7" href="#SEC7">NEWLINES</a>
+<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
+<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
+<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
+<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
+<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
+<li><a name="TOC13" href="#SEC13">AUTHOR</a>
+<li><a name="TOC14" href="#SEC14">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>pcre2grep [options] [long options] [pattern] [path1 path2 ...]</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+<b>pcre2grep</b> searches files for character patterns, in the same way as other
+grep commands do, but it uses the PCRE2 regular expression library to support
+patterns that are compatible with the regular expressions of Perl 5. See
+<a href="pcre2syntax.html"><b>pcre2syntax</b>(3)</a>
+for a quick-reference summary of pattern syntax, or
+<a href="pcre2pattern.html"><b>pcre2pattern</b>(3)</a>
+for a full description of the syntax and semantics of the regular expressions
+that PCRE2 supports.
+</P>
+<P>
+Patterns, whether supplied on the command line or in a separate file, are given
+without delimiters. For example:
+<pre>
+  pcre2grep Thursday /etc/motd
+</pre>
+If you attempt to use delimiters (for example, by surrounding a pattern with
+slashes, as is common in Perl scripts), they are interpreted as part of the
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed quotes are required if a
+pattern contains white space or shell metacharacters.
+</P>
+<P>
+The first argument that follows any option settings is treated as the single
+pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
+Conversely, when one or both of these options are used to specify patterns, all
+arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
+argument pattern must be provided.
+</P>
+<P>
+If no files are specified, <b>pcre2grep</b> reads the standard input. The
+standard input can also be referenced by a name consisting of a single hyphen.
+For example:
+<pre>
+  pcre2grep some-pattern /file1 - /file3
+</pre>
+By default, each line that matches a pattern is copied to the standard
+output, and if there is more than one file, the file name is output at the
+start of each line, followed by a colon. However, there are options that can
+change how <b>pcre2grep</b> behaves. In particular, the <b>-M</b> option makes it
+possible to search for patterns that span line boundaries. What defines a line
+boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
+</P>
+<P>
+The amount of memory used for buffering files that are being scanned is
+controlled by a parameter that can be set by the <b>--buffer-size</b> option.
+The default value for this parameter is specified when <b>pcre2grep</b> is built,
+with the default default being 20K. A block of memory three times this size is
+used (to allow for buffering "before" and "after" lines). An error occurs if a
+line overflows the buffer.
+</P>
+<P>
+Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
+(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
+each line in the order in which they are defined, except that all the <b>-e</b>
+patterns are tried before the <b>-f</b> patterns.
+</P>
+<P>
+By default, as soon as one pattern matches a line, no further patterns are
+considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
+matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
+<b>--line-offsets</b> is used to output only the part of the line that matched
+(either shown literally, or as an offset), scanning resumes immediately
+following the match, so that further matches on the same line can be found. If
+there are multiple patterns, they are all tried on the remainder of the line,
+but patterns that follow the one that matched are not tried on the earlier part
+of the line.
+</P>
+<P>
+This behaviour means that the order in which multiple patterns are specified
+can affect the output when one of the above options is used. This is no longer
+the same behaviour as GNU grep, which now manages to display earlier matches
+for later patterns (as long as there is no overlap).
+</P>
+<P>
+Patterns that can match an empty string are accepted, but empty string
+matches are never recognized. An example is the pattern "(super)?(man)?", in
+which all components are optional. This pattern finds all occurrences of both
+"super" and "man"; the output differs from matching with "super|man" when only
+the matching substrings are being shown.
+</P>
+<P>
+If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
+<b>pcre2grep</b> uses the value to set a locale when calling the PCRE2 library.
+The <b>--locale</b> option can be used to override this.
+</P>
+<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
+<P>
+It is possible to compile <b>pcre2grep</b> so that it uses <b>libz</b> or
+<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
+respectively. You can find out whether your binary has support for one or both
+of these file types by running it with the <b>--help</b> option. If the
+appropriate support is not present, files are treated as plain text. The
+standard input is always so treated.
+</P>
+<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
+<P>
+By default, a file that contains a binary zero byte within the first 1024 bytes
+is identified as a binary file, and is processed specially. (GNU grep also
+identifies binary files in this manner.) See the <b>--binary-files</b> option
+for a means of changing the way binary files are handled.
+</P>
+<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
+<P>
+The order in which some of the options appear can affect the output. For
+example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
+names. Whichever comes later in the command line will be the one that takes
+effect. Similarly, except where noted below, if an option is given twice, the
+later setting is used. Numerical values for options may be followed by K or M,
+to signify multiplication by 1024 or 1024*1024 respectively.
+</P>
+<P>
+<b>--</b>
+This terminates the list of options. It is useful if the next item on the
+command line starts with a hyphen but is not an option. This allows for the
+processing of patterns and filenames that start with hyphens.
+</P>
+<P>
+<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
+Output <i>number</i> lines of context after each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
+guarantees to have up to 8K of following text available for context output.
+</P>
+<P>
+<b>-a</b>, <b>--text</b>
+Treat binary files as text. This is equivalent to
+<b>--binary-files</b>=<i>text</i>.
+</P>
+<P>
+<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
+Output <i>number</i> lines of context before each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
+guarantees to have up to 8K of preceding text available for context output.
+</P>
+<P>
+<b>--binary-files=</b><i>word</i>
+Specify how binary files are to be processed. If the word is "binary" (the
+default), pattern matching is performed on binary files, but the only output is
+"Binary file &#60;name&#62; matches" when a match succeeds. If the word is "text",
+which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
+processed in the same way as any other file. In this case, when a match
+succeeds, the output may be binary garbage, which can have nasty effects if
+sent to a terminal. If the word is "without-match", which is equivalent to the
+<b>-I</b> option, binary files are not processed at all; they are assumed not to
+be of interest.
+</P>
+<P>
+<b>--buffer-size=</b><i>number</i>
+Set the parameter that controls how much memory is used for buffering files
+that are being scanned.
+</P>
+<P>
+<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
+Output <i>number</i> lines of context both before and after each matching line.
+This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
+</P>
+<P>
+<b>-c</b>, <b>--count</b>
+Do not output individual lines from the files that are being scanned; instead
+output the number of lines that would otherwise have been shown. If no lines
+are selected, the number zero is output. If several files are are being
+scanned, a count is output for each of them. However, if the
+<b>--files-with-matches</b> option is also used, only those files whose counts
+are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
+<b>-B</b>, and <b>-C</b> options are ignored.
+</P>
+<P>
+<b>--colour</b>, <b>--color</b>
+If this option is given without any data, it is equivalent to "--colour=auto".
+If data is required, it must be given in the same shell item, separated by an
+equals sign.
+</P>
+<P>
+<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
+This option specifies under what circumstances the parts of a line that matched
+a pattern should be coloured in the output. By default, the output is not
+coloured. The value (which is optional, see above) may be "never", "always", or
+"auto". In the latter case, colouring happens only if the standard output is
+connected to a terminal. More resources are used when colouring is enabled,
+because <b>pcre2grep</b> has to search for all possible matches in a line, not
+just one, in order to colour them all.
+<br>
+<br>
+The colour that is used can be specified by setting the environment variable
+PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
+</P>
+<P>
+<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
+If an input path is not a regular file or a directory, "action" specifies how
+it is to be processed. Valid values are "read" (the default) or "skip"
+(silently skip the path).
+</P>
+<P>
+<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
+If an input path is a directory, "action" specifies how it is to be processed.
+Valid values are "read" (the default in non-Windows environments, for
+compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
+"skip" (silently skip the path, the default in Windows environments). In the
+"read" case, directories are read as if they were ordinary files. In some
+operating systems the effect of reading a directory like this is an immediate
+end-of-file; in others it may provoke an error.
+</P>
+<P>
+<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is no limit to the number of patterns. They are applied to each
+line in the order in which they are defined until one matches.
+<br>
+<br>
+If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
+followed by the patterns from the file(s), independent of the order in which
+these options are specified. Note that multiple use of <b>-e</b> is not the same
+as a single pattern with alternatives. For example, X|Y finds the first
+character in a line that is X or Y, whereas if the two patterns are given
+separately, with X first, <b>pcre2grep</b> finds X if it is present, even if it
+follows Y in the line. It finds Y only if there is no X in the line. This
+matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
+of the line that matched.
+</P>
+<P>
+<b>--exclude</b>=<i>pattern</i>
+Files (but not directories) whose names match the pattern are skipped without
+being processed. This applies to all files, whether listed on the command line,
+obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
+PCRE2 regular expression, and is matched against the final component of the file
+name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
+apply to this pattern. The option may be given any number of times in order to
+specify multiple patterns. If a file name matches both an <b>--include</b>
+and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
+option.
+</P>
+<P>
+<b>--exclude-from=</b><i>filename</i>
+Treat each non-empty line of the file as the data for an <b>--exclude</b>
+option. What constitutes a newline when reading the file is the operating
+system's default. The <b>--newline</b> option has no effect on this option. This
+option may be given more than once in order to specify a number of files to
+read.
+</P>
+<P>
+<b>--exclude-dir</b>=<i>pattern</i>
+Directories whose names match the pattern are skipped without being processed,
+whatever the setting of the <b>--recursive</b> option. This applies to all
+directories, whether listed on the command line, obtained from
+<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE2
+regular expression, and is matched against the final component of the directory
+name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
+apply to this pattern. The option may be given any number of times in order to
+specify more than one pattern. If a directory matches both <b>--include-dir</b>
+and <b>--exclude-dir</b>, it is excluded. There is no short form for this
+option.
+</P>
+<P>
+<b>-F</b>, <b>--fixed-strings</b>
+Interpret each data-matching pattern as a list of fixed strings, separated by
+newlines, instead of as a regular expression. What constitutes a newline for
+this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
+as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
+They apply to each of the fixed strings. A line is selected if any of the fixed
+strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the <b>--include</b> or
+<b>--exclude</b> options.
+</P>
+<P>
+<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
+Read patterns from the file, one per line, and match them against
+each line of input. What constitutes a newline when reading the file is the
+operating system's default. The <b>--newline</b> option has no effect on this
+option. Trailing white space is removed from each line, and blank lines are
+ignored. An empty file contains no patterns and therefore matches nothing. See
+also the comments about multiple patterns versus a single pattern with
+alternatives in the description of <b>-e</b> above.
+<br>
+<br>
+If this option is given more than once, all the specified files are
+read. A data line is output if any of the patterns match it. A filename can
+be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
+specified on the command line using <b>-e</b> may also be present; they are
+tested before the file's patterns. However, no other pattern is taken from the
+command line; all arguments are treated as the names of paths to be searched.
+</P>
+<P>
+<b>--file-list</b>=<i>filename</i>
+Read a list of files and/or directories that are to be scanned from the given
+file, one per line. Trailing white space is removed from each line, and blank
+lines are ignored. These paths are processed before any that are listed on the
+command line. The filename can be given as "-" to refer to the standard input.
+If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
+read first. This is useful only when the standard input is a terminal, from
+which further lines (the list of files) can be read after an end-of-file
+indication. If this option is given more than once, all the specified files are
+read.
+</P>
+<P>
+<b>--file-offsets</b>
+Instead of showing lines or parts of lines that match, show each match as an
+offset from the start of the file and a length, separated by a comma. In this
+mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with <b>--line-offsets</b>
+and <b>--only-matching</b>.
+</P>
+<P>
+<b>-H</b>, <b>--with-filename</b>
+Force the inclusion of the filename at the start of output lines when searching
+a single file. By default, the filename is not shown in this case. For matching
+lines, the filename is followed by a colon; for context lines, a hyphen
+separator is used. If a line number is also being output, it follows the file
+name.
+</P>
+<P>
+<b>-h</b>, <b>--no-filename</b>
+Suppress the output filenames when searching multiple files. By default,
+filenames are shown when multiple files are searched. For matching lines, the
+filename is followed by a colon; for context lines, a hyphen separator is used.
+If a line number is also being output, it follows the file name.
+</P>
+<P>
+<b>--help</b>
+Output a help message, giving brief details of the command options and file
+type support, and then exit. Anything else on the command line is
+ignored.
+</P>
+<P>
+<b>-I</b>
+Treat binary files as never matching. This is equivalent to
+<b>--binary-files</b>=<i>without-match</i>.
+</P>
+<P>
+<b>-i</b>, <b>--ignore-case</b>
+Ignore upper/lower case distinctions during comparisons.
+</P>
+<P>
+<b>--include</b>=<i>pattern</i>
+If any <b>--include</b> patterns are specified, the only files that are
+processed are those that match one of the patterns (and do not match an
+<b>--exclude</b> pattern). This option does not affect directories, but it
+applies to all files, whether listed on the command line, obtained from
+<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
+expression, and is matched against the final component of the file name, not
+the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
+this pattern. The option may be given any number of times. If a file name
+matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
+There is no short form for this option.
+</P>
+<P>
+<b>--include-from=</b><i>filename</i>
+Treat each non-empty line of the file as the data for an <b>--include</b>
+option. What constitutes a newline for this purpose is the operating system's
+default. The <b>--newline</b> option has no effect on this option. This option
+may be given any number of times; all the files are read.
+</P>
+<P>
+<b>--include-dir</b>=<i>pattern</i>
+If any <b>--include-dir</b> patterns are specified, the only directories that
+are processed are those that match one of the patterns (and do not match an
+<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+on the command line, obtained from <b>--file-list</b>, or by scanning a parent
+directory. The pattern is a PCRE2 regular expression, and is matched against the
+final component of the directory name, not the entire path. The <b>-F</b>,
+<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
+given any number of times. If a directory matches both <b>--include-dir</b> and
+<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
+</P>
+<P>
+<b>-L</b>, <b>--files-without-match</b>
+Instead of outputting lines from the files, just output the names of the files
+that do not contain any lines that would have been output. Each file name is
+output once, on a separate line.
+</P>
+<P>
+<b>-l</b>, <b>--files-with-matches</b>
+Instead of outputting lines from the files, just output the names of the files
+containing lines that would have been output. Each file name is output
+once, on a separate line. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches.
+</P>
+<P>
+<b>--label</b>=<i>name</i>
+This option supplies a name to be used for the standard input when file names
+are being output. If not supplied, "(standard input)" is used. There is no
+short form for this option.
+</P>
+<P>
+<b>--line-buffered</b>
+When this option is given, input is read and processed line by line, and the
+output is flushed after each write. By default, input is read in large chunks,
+unless <b>pcre2grep</b> can determine that it is reading from a terminal (which
+is currently possible only in Unix-like environments). Output to terminal is
+normally automatically flushed by the operating system. This option can be
+useful when the input or output is attached to a pipe and you do not want
+<b>pcre2grep</b> to buffer up large amounts of data. However, its use will affect
+performance, and the <b>-M</b> (multiline) option ceases to work.
+</P>
+<P>
+<b>--line-offsets</b>
+Instead of showing lines or parts of lines that match, show each match as a
+line number, the offset from the start of the line, and a length. The line
+number is terminated by a colon (as usual; see the <b>-n</b> option), and the
+offset and length are separated by a comma. In this mode, no context is shown.
+That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
+more than one match in a line, each of them is shown separately. This option is
+mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
+</P>
+<P>
+<b>--locale</b>=<i>locale-name</i>
+This option specifies a locale to be used for pattern matching. It overrides
+the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
+locale is specified, the PCRE2 library's default (usually the "C" locale) is
+used. There is no short form for this option.
+</P>
+<P>
+<b>--match-limit</b>=<i>number</i>
+Processing some regular expression patterns can require a very large amount of
+memory, leading in some cases to a program crash if not enough is available.
+Other patterns may take a very long time to search for all possible matching
+strings. The <b>pcre2_exec()</b> function that is called by <b>pcre2grep</b> to do
+the matching has two parameters that can limit the resources that it uses.
+<br>
+<br>
+The <b>--match-limit</b> option provides a means of limiting resource usage
+when processing patterns that are not going to match, but which have a very
+large number of possibilities in their search trees. The classic example is a
+pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
+called <b>match()</b> which it calls repeatedly (sometimes recursively). The
+limit set by <b>--match-limit</b> is imposed on the number of times this
+function is called during a match, which has the effect of limiting the amount
+of backtracking that can take place.
+<br>
+<br>
+The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
+instead of limiting the total number of times that <b>match()</b> is called, it
+limits the depth of recursive calls, which in turn limits the amount of memory
+that can be used. The recursion depth is a smaller number than the total number
+of calls, because not all calls to <b>match()</b> are recursive. This limit is
+of use only if it is set smaller than <b>--match-limit</b>.
+<br>
+<br>
+There are no short forms for these options. The default settings are specified
+when the PCRE2 library is compiled, with the default default being 10 million.
+</P>
+<P>
+<b>-M</b>, <b>--multiline</b>
+Allow patterns to match more than one line. When this option is given, patterns
+may usefully contain literal newline characters and internal occurrences of ^
+and $ characters. The output for a successful match may consist of more than
+one line, the last of which is the one in which the match ended. If the matched
+string ends with a newline sequence the output ends at the end of that line.
+<br>
+<br>
+When this option is set, the PCRE2 library is called in "multiline" mode.
+There is a limit to the number of lines that can be matched, imposed by the way
+that <b>pcre2grep</b> buffers the input file as it scans it. However,
+<b>pcre2grep</b> ensures that at least 8K characters or the rest of the document
+(whichever is the shorter) are available for forward matching, and similarly
+the previous 8K characters (or all the previous characters, if fewer than 8K)
+are guaranteed to be available for lookbehind assertions. This option does not
+work when input is read line by line (see \fP--line-buffered\fP.)
+</P>
+<P>
+<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
+The PCRE2 library supports five different conventions for indicating
+the ends of lines. They are the single-character sequences CR (carriage return)
+and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
+which recognizes any of the preceding three types, and an "any" convention, in
+which any Unicode line ending sequence is assumed to end a line. The Unicode
+sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
+(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
+PS (paragraph separator, U+2029).
+<br>
+<br>
+When the PCRE2 library is built, a default line-ending sequence is specified.
+This is normally the standard sequence for the operating system. Unless
+otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
+The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
+makes it possible to use <b>pcre2grep</b> to scan files that have come from other
+environments without having to modify their line endings. If the data that is
+being scanned does not agree with the convention set by this option,
+<b>pcre2grep</b> may behave in strange ways. Note that this option does not
+apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
+<b>--include-from</b> options, which are expected to use the operating system's
+standard newline sequence.
+</P>
+<P>
+<b>-n</b>, <b>--line-number</b>
+Precede each output line by its line number in the file, followed by a colon
+for matching lines or a hyphen for context lines. If the filename is also being
+output, it precedes the line number. This option is forced if
+<b>--line-offsets</b> is used.
+</P>
+<P>
+<b>--no-jit</b>
+If the PCRE2 library is built with support for just-in-time compiling (which
+speeds up matching), <b>pcre2grep</b> automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+</P>
+<P>
+<b>-o</b>, <b>--only-matching</b>
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
+<b>-C</b> options are ignored. If there is more than one match in a line, each
+of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
+sense of the match to find non-matching lines), no output is generated, but the
+return code is set appropriately. If the matched portion of the line is empty,
+nothing is output unless the file name or line number are being printed, in
+which case they are shown on an otherwise empty line. This option is mutually
+exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
+</P>
+<P>
+<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 32 capturing parentheses are supported, and -o0 is
+equivalent to <b>-o</b> without a number. Because these options can be given
+without an argument (see above), if an argument is present, it must be given in
+the same shell item, for example, -o3 or --only-matching=2. The comments given
+for the non-argument case above also apply to this case. If the specified
+capturing parentheses do not exist in the pattern, or were not set in the
+match, nothing is output unless the file name or line number are being printed.
+<br>
+<br>
+If this option is given multiple times, multiple substrings are output, in the
+order the options are given. For example, -o3 -o1 -o3 causes the substrings
+matched by capturing parentheses 3 and 1 and then 3 again to be output. By
+default, there is no separator (but see the next option).
+</P>
+<P>
+<b>--om-separator</b>=<i>text</i>
+Specify a separating string for multiple occurrences of <b>-o</b>. The default
+is an empty string. Separating strings are never coloured.
+</P>
+<P>
+<b>-q</b>, <b>--quiet</b>
+Work quietly, that is, display nothing except error messages. The exit
+status indicates whether or not any matches were found.
+</P>
+<P>
+<b>-r</b>, <b>--recursive</b>
+If any given path is a directory, recursively scan the files it contains,
+taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
+directory is read as a normal file; in some operating systems this gives an
+immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
+option to "recurse".
+</P>
+<P>
+<b>--recursion-limit</b>=<i>number</i>
+See <b>--match-limit</b> above.
+</P>
+<P>
+<b>-s</b>, <b>--no-messages</b>
+Suppress error messages about non-existent or unreadable files. Such files are
+quietly skipped. However, the return code is still 2, even if matches were
+found in other files.
+</P>
+<P>
+<b>-u</b>, <b>--utf-8</b>
+Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
+with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
+<b>--include</b> options) and all subject lines that are scanned must be valid
+strings of UTF-8 characters.
+</P>
+<P>
+<b>-V</b>, <b>--version</b>
+Write the version numbers of <b>pcre2grep</b> and the PCRE2 library to the
+standard output and then exit. Anything else on the command line is
+ignored.
+</P>
+<P>
+<b>-v</b>, <b>--invert-match</b>
+Invert the sense of the match, so that lines which do <i>not</i> match any of
+the patterns are the ones that are found.
+</P>
+<P>
+<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
+Force the patterns to match only whole words. This is equivalent to having \b
+at the start and end of the pattern. This option applies only to the patterns
+that are matched against the contents of files; it does not apply to patterns
+specified by any of the <b>--include</b> or <b>--exclude</b> options.
+</P>
+<P>
+<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
+Force the patterns to be anchored (each must start matching at the beginning of
+a line) and in addition, require them to match entire lines. This is equivalent
+to having ^ and $ characters at the start and end of each alternative branch in
+every pattern. This option applies only to the patterns that are matched
+against the contents of files; it does not apply to patterns specified by any
+of the <b>--include</b> or <b>--exclude</b> options.
+</P>
+<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
+<P>
+The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
+order, for a locale. The first one that is set is used. This can be overridden
+by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
+(usually the "C" locale) is used.
+</P>
+<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
+<P>
+The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
+different newline conventions from the default. Any parts of the input files
+that are written to the standard output are copied identically, with whatever
+newline sequences they have in the input. However, the setting of this option
+does not affect the interpretation of files specified by the <b>-f</b>,
+<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
+the operating system's standard newline sequence, nor does it affect the way in
+which <b>pcre2grep</b> writes informational messages to the standard error and
+output streams. For these it uses the string "\n" to indicate newlines,
+relying on the C I/O library to convert this to an appropriate sequence.
+</P>
+<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
+<P>
+Many of the short and long forms of <b>pcre2grep</b>'s options are the same
+as in the GNU <b>grep</b> program. Any long option of the form
+<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
+(PCRE2 terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
+<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
+<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
+<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
+<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
+capturing parentheses number.
+</P>
+<P>
+Although most of the common options work the same way, a few are different in
+<b>pcre2grep</b>. For example, the <b>--include</b> option's argument is a glob
+for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
+<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
+without counts, but <b>pcre2grep</b> gives the counts.
+</P>
+<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
+<P>
+There are four different ways in which an option with data can be specified.
+If a short form option is used, the data may follow immediately, or (with one
+exception) in the next command line item. For example:
+<pre>
+  -f/some/file
+  -f /some/file
+</pre>
+The exception is the <b>-o</b> option, which may appear with or without data.
+Because of this, if data is present, it must follow immediately in the same
+item, for example -o3.
+</P>
+<P>
+If a long form option is used, the data may appear in the same command line
+item, separated by an equals character, or (with two exceptions) it may appear
+in the next command line item. For example:
+<pre>
+  --file=/some/file
+  --file /some/file
+</pre>
+Note, however, that if you want to supply a file name beginning with ~ as data
+in a shell command, and have the shell expand ~ to a home directory, you must
+separate the file name from the option, because the shell does not treat ~
+specially unless it is at the start of an item.
+</P>
+<P>
+The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
+<b>--only-matching</b> options, for which the data is optional. If one of these
+options does have data, it must be given in the first form, using an equals
+character. Otherwise <b>pcre2grep</b> will assume that it has no data.
+</P>
+<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
+<P>
+It is possible to supply a regular expression that takes a very long time to
+fail to match certain lines. Such patterns normally involve nested indefinite
+repeats, for example: (a+)*\d when matched against a line of a's with no final
+digit. The PCRE2 matching function has a resource limit that causes it to abort
+in these circumstances. If this happens, <b>pcre2grep</b> outputs an error
+message and the line that caused the problem to the standard error stream. If
+there are more than 20 such errors, <b>pcre2grep</b> gives up.
+</P>
+<P>
+The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the overall
+resource limit; there is a second option called <b>--recursion-limit</b> that
+sets a limit on the amount of memory (usually stack) that is used (see the
+discussion of these options above).
+</P>
+<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
+<P>
+Exit status is 0 if any matches were found, 1 if no matches were found, and 2
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+<b>-s</b> option to suppress error messages about inaccessible files does not
+affect the return code.
+</P>
+<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2test</b>(1).
+</P>
+<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC14" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/pcre2-config.1 b/doc/pcre2-config.1
new file mode 100644
index 0000000..7fa0a09
--- /dev/null
+++ b/doc/pcre2-config.1
@@ -0,0 +1,86 @@
+.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+pcre2-config - program to return PCRE2 configuration
+.SH SYNOPSIS
+.rs
+.sp
+.nf
+.B pcre2-config  [--prefix] [--exec-prefix] [--version]
+.B "             [--libs8] [--libs16] [--libs32] [--libs-posix]"
+.B "             [--cflags] [--cflags-posix]"
+.fi
+.
+.
+.SH DESCRIPTION
+.rs
+.sp
+\fBpcre2-config\fP returns the configuration of the installed PCRE2 libraries
+and the options required to compile a program to use them. Some of the options
+apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
+not available for libraries that have not been built. If an unavailable option
+is encountered, the "usage" information is output.
+.
+.
+.SH OPTIONS
+.rs
+.TP 10
+\fB--prefix\fP
+Writes the directory prefix used in the PCRE2 installation for architecture
+independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some
+systems) to the standard output.
+.TP 10
+\fB--exec-prefix\fP
+Writes the directory prefix used in the PCRE2 installation for architecture
+dependent files (normally the same as \fB--prefix\fP) to the standard output.
+.TP 10
+\fB--version\fP
+Writes the version number of the installed PCRE2 libraries to the standard
+output.
+.TP 10
+\fB--libs8\fP
+Writes to the standard output the command line options required to link
+with the 8-bit PCRE2 library (\fB-lpcre2-8\fP on many systems).
+.TP 10
+\fB--libs16\fP
+Writes to the standard output the command line options required to link
+with the 16-bit PCRE2 library (\fB-lpcre2-16\fP on many systems).
+.TP 10
+\fB--libs32\fP
+Writes to the standard output the command line options required to link
+with the 32-bit PCRE2 library (\fB-lpcre2-32\fP on many systems).
+.TP 10
+\fB--libs-posix\fP
+Writes to the standard output the command line options required to link with
+PCRE2's POSIX API wrapper library (\fB-lpcre2-posix\fP \fB-lpcre2-8\fP on many
+systems).
+.TP 10
+\fB--cflags\fP
+Writes to the standard output the command line options required to compile
+files that use PCRE2 (this may include some \fB-I\fP options, but is blank on
+many systems).
+.TP 10
+\fB--cflags-posix\fP
+Writes to the standard output the command line options required to compile
+files that use PCRE2's POSIX API wrapper library (this may include some
+\fB-I\fP options, but is blank on many systems).
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2(3)\fP
+.
+.
+.SH AUTHOR
+.rs
+.sp
+This manual page was originally written by Mark Baker for the Debian GNU/Linux
+system. It has been subsequently revised as a generic PCRE2 man page.
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+.fi
diff --git a/doc/pcre2-config.txt b/doc/pcre2-config.txt
new file mode 100644
index 0000000..8ddea2a
--- /dev/null
+++ b/doc/pcre2-config.txt
@@ -0,0 +1,81 @@
+PCRE2-CONFIG(1)             General Commands Manual            PCRE2-CONFIG(1)
+
+
+
+NAME
+       pcre2-config - program to return PCRE2 configuration
+
+SYNOPSIS
+
+       pcre2-config [--prefix] [--exec-prefix] [--version]
+                    [--libs8] [--libs16] [--libs32] [--libs-posix]
+                    [--cflags] [--cflags-posix]
+
+
+DESCRIPTION
+
+       pcre2-config returns the configuration of the installed PCRE2 libraries
+       and the options required to compile a program to use them. Some of  the
+       options  apply  only  to  the  8-bit,  or  16-bit, or 32-bit libraries,
+       respectively, and are not available for libraries that  have  not  been
+       built. If an unavailable option is encountered, the "usage" information
+       is output.
+
+
+OPTIONS
+
+       --prefix  Writes the directory prefix used in  the  PCRE2  installation
+                 for  architecture  independent  files  (/usr on many systems,
+                 /usr/local on some systems) to the standard output.
+
+       --exec-prefix
+                 Writes the directory prefix used in  the  PCRE2  installation
+                 for architecture dependent files (normally the same as --pre-
+                 fix) to the standard output.
+
+       --version Writes the version number of the installed PCRE2 libraries to
+                 the standard output.
+
+       --libs8   Writes  to  the  standard  output  the  command  line options
+                 required to link with the 8-bit PCRE2 library  (-lpcre2-8  on
+                 many systems).
+
+       --libs16  Writes  to  the  standard  output  the  command  line options
+                 required to link with the 16-bit PCRE2 library (-lpcre2-16 on
+                 many systems).
+
+       --libs32  Writes  to  the  standard  output  the  command  line options
+                 required to link with the 32-bit PCRE2 library (-lpcre2-32 on
+                 many systems).
+
+       --libs-posix
+                 Writes  to  the  standard  output  the  command  line options
+                 required to link  with  PCRE2's  POSIX  API  wrapper  library
+                 (-lpcre2-posix -lpcre2-8 on many systems).
+
+       --cflags  Writes  to  the  standard  output  the  command  line options
+                 required to compile files that use PCRE2  (this  may  include
+                 some -I options, but is blank on many systems).
+
+       --cflags-posix
+                 Writes  to  the  standard  output  the  command  line options
+                 required to compile files that use PCRE2's POSIX API  wrapper
+                 library  (this  may  include some -I options, but is blank on
+                 many systems).
+
+
+SEE ALSO
+
+       pcre2(3)
+
+
+AUTHOR
+
+       This manual page was originally written by Mark Baker  for  the  Debian
+       GNU/Linux  system.  It has been subsequently revised as a generic PCRE2
+       man page.
+
+
+REVISION
+
+       Last updated: 28 September 2014
diff --git a/doc/pcre2.3 b/doc/pcre2.3
new file mode 100644
index 0000000..aaa71d3
--- /dev/null
+++ b/doc/pcre2.3
@@ -0,0 +1,180 @@
+.TH PCRE2 3 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH INTRODUCTION
+.rs
+.sp
+PCRE2 is the name used for a revised API for the PCRE library, which is a set
+of functions, written in C, that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences. Some
+features that appeared in Python and the original PCRE before they appeared in
+Perl are also available using the Python syntax, there is some support for one
+or two .NET and Oniguruma syntax items, and there are options for requesting
+some minor changes that give better ECMAScript (aka JavaScript) compatibility.
+.P
+The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
+code units, which means that up to three separate libraries may be installed.
+The original work to extend PCRE to 16-bit and 32-bit code units was done by
+Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
+can be interpreted either as one character per code unit, or as UTF-encoded
+Unicode, with support for Unicode general category properties. Unicode is
+optional at build time, and must be enabled explicitly at run time. The version
+of Unicode in use can be discovered by running
+.sp
+  pcre2test -C
+.P
+The three libraries contain identical sets of functions, with names ending in 
+_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, 
+by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just 
+one code unit width can be written using generic names such as
+\fBpcre2_compile()\fP, and the documentation is written assuming that this is 
+the case.
+.P
+In addition to the Perl-compatible matching function, PCRE2 contains an
+alternative function that matches the same compiled patterns in a different
+way. In certain circumstances, the alternative function has some advantages.
+For a discussion of the two matching algorithms, see the
+.\" HREF
+\fBpcre2matching\fP
+.\"
+page.
+.P
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE2 are given in separate documents. See the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+and
+.\" HREF
+\fBpcre2compat\fP
+.\"
+pages. There is a syntax summary in the
+.\" HREF
+\fBpcre2syntax\fP
+.\"
+page.
+.P
+Some features of PCRE2 can be included, excluded, or changed when the library
+is built. The
+.\" HREF
+\fBpcre2_config()\fP
+.\"
+function makes it possible for a client to discover which features are
+available. The features themselves are described in the
+.\" HREF
+\fBpcre2build\fP
+.\"
+page. Documentation about building PCRE2 for various operating systems can be
+found in the
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+and
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS_BUILD\fP
+.\"
+files in the source distribution.
+.P
+The libraries contains a number of undocumented internal functions and data
+tables that are used by more than one of the exported external functions, but
+which are not intended for use by external callers. Their names all begin with
+"_pcre2", which hopefully will not provoke any name clashes. In some
+environments, it is possible to control which external symbols are exported
+when a shared library is built, and in these cases the undocumented symbols are
+not exported.
+.
+.
+.SH "SECURITY CONSIDERATIONS"
+.rs
+.sp
+If you are using PCRE2 in a non-UTF application that permits users to supply
+arbitrary patterns for compilation, you should be aware of a feature that
+allows users to turn on UTF support from within a pattern, provided that PCRE2
+was built with Unicode support. For example, an 8-bit pattern that begins with
+"(*UTF)" turns on UTF-8 mode, which interprets patterns and subjects as strings
+of UTF-8 code units instead of individual 8-bit characters. This causes both
+the pattern and any data against which it is matched to be checked for UTF-8
+validity. If the data string is very long, such a check might use sufficiently
+many resources as to cause your application to lose performance.
+.P
+One way of guarding against this possibility is to use the
+\fBpcre2_pattern_info()\fP function to check the compiled pattern's options for
+UTF. Alternatively, you can set the PCRE2_NEVER_UTF option at compile time.
+This causes an compile time error if a pattern contains a UTF-setting sequence.
+.P
+If your application is one that supports UTF, be aware that validity checking
+can take time. If the same data string is to be matched many times, you can use
+the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
+running redundant checks.
+.P
+Another way that performance can be hit is by running a pattern that has a very
+large search tree against a string that will never match. Nested unlimited
+repeats in a pattern are a common example. PCRE2 provides some protection
+against this: see the \fBpcre2_set_match_limit()\fP function in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page.
+.
+.
+.SH "USER DOCUMENTATION"
+.rs
+.sp
+The user documentation for PCRE2 comprises a number of different sections. In
+the "man" format, each of these is a separate "man page". In the HTML format,
+each is a separate page, linked from the index page. In the plain text format,
+the descriptions of the \fBpcre2grep\fP and \fBpcre2test\fP programs are in
+files called \fBpcre2grep.txt\fP and \fBpcre2test.txt\fP, respectively. The
+remaining sections, except for the \fBpcre2demo\fP section (which is a program
+listing), and the short pages for individual functions, are concatenated in
+\fBpcre2.txt\fP, for ease of searching. The sections are as follows:
+.sp
+  pcre2              this document FIXME CHECK THIS LIST
+  pcre2-config       show PCRE2 installation configuration information
+  pcre2api           details of PCRE2's native C API
+  pcre2build         building PCRE2
+  pcre2callout       details of the callout feature
+  pcre2compat        discussion of Perl compatibility
+  pcre2demo          a demonstration C program that uses PCRE2
+  pcre2grep          description of the \fBpcre2grep\fP command (8-bit only)
+  pcre2jit           discussion of the just-in-time optimization support
+  pcre2limits        details of size and other limits
+  pcre2matching      discussion of the two matching algorithms
+  pcre2partial       details of the partial matching facility
+.\" JOIN
+  pcre2pattern       syntax and semantics of supported
+                      regular expressions
+  pcre2perform       discussion of performance issues
+  pcre2posix         the POSIX-compatible C API for the 8-bit library
+  pcre2sample        discussion of the pcre2demo program
+  pcre2stack         discussion of stack usage
+  pcre2syntax        quick syntax reference
+  pcre2test          description of the \fBpcre2test\fP testing command
+  pcre2unicode       discussion of Unicode and UTF support
+.sp
+In the "man" and HTML formats, there is also a short page for each C library
+function, listing its arguments and results.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.P
+Putting an actual email address here is a spam magnet. If you want to email me,
+use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2build.3 b/doc/pcre2build.3
new file mode 100644
index 0000000..2146777
--- /dev/null
+++ b/doc/pcre2build.3
@@ -0,0 +1,490 @@
+.TH PCRE2BUILD 3 "28 Sepember 2014" "PCRE2 10.00"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.
+.
+.SH "BUILDING PCRE2"
+.rs
+.sp
+PCRE2 is distributed with a \fBconfigure\fP script that can be used to build
+the library in Unix-like environments using the applications known as
+Autotools. Also in the distribution are files to support building using
+\fBCMake\fP instead of \fBconfigure\fP. The text file
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+contains general information about building with Autotools (some of which is
+repeated below), and also has some comments about building on various operating
+systems. There is a lot more information about building PCRE2 without using
+Autotools (including information about using \fBCMake\fP and building "by
+hand") in the text file called
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS-BUILD\fP.
+.\"
+You should consult this file as well as the
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+file if you are building in a non-Unix-like environment.
+.
+.
+.SH "PCRE2 BUILD-TIME OPTIONS"
+.rs
+.sp
+The rest of this document describes the optional features of PCRE2 that can be
+selected when the library is compiled. It assumes use of the \fBconfigure\fP
+script, where the optional features are selected or deselected by providing
+options to \fBconfigure\fP before running the \fBmake\fP command. However, the
+same options can be selected in both Unix-like and non-Unix-like environments
+if you are using \fBCMake\fP instead of \fBconfigure\fP to build PCRE2.
+.P
+If you are not using Autotools or \fBCMake\fP, option selection can be done by
+editing the \fBconfig.h\fP file, or by passing parameter settings to the
+compiler, as described in
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS-BUILD\fP.
+.\"
+.P
+The complete list of options for \fBconfigure\fP (which includes the standard
+ones such as the selection of the installation directory) can be obtained by
+running
+.sp
+  ./configure --help
+.sp
+The following sections include descriptions of options whose names begin with
+--enable or --disable. These settings specify changes to the defaults for the
+\fBconfigure\fP command. Because of the way that \fBconfigure\fP works,
+--enable and --disable always come in pairs, so the complementary option always
+exists as well, but as it specifies the default, it is not described.
+.
+.
+.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
+.rs
+.sp
+By default, a library called \fBlibpcre2-8\fP is built, containing functions
+that take string arguments contained in vectors of bytes, interpreted either as
+single-byte characters, or UTF-8 strings. You can also build two other
+libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
+strings that are contained in vectors of 16-bit and 32-bit code units,
+respectively. These can be interpreted either as single-unit characters or
+UTF-16/UTF-32 strings. To build these additional libraries, add one or both of 
+the following to the \fBconfigure\fP command:
+.sp
+  --enable-pcre16
+  --enable-pcre32 
+.sp
+If you do not want the 8-bit library, add
+.sp
+  --disable-pcre8
+.sp
+as well. At least one of the three libraries must be built. Note that the POSIX
+wrapper is for the 8-bit library only, and that \fBpcre2grep\fP is an 8-bit
+program. Neither of these are built if you select only the 16-bit or 32-bit
+libraries.
+.
+.
+.SH "BUILDING SHARED AND STATIC LIBRARIES"
+.rs
+.sp
+The Autotools PCRE2 building process uses \fBlibtool\fP to build both shared
+and static libraries by default. You can suppress one of these by adding one of
+.sp
+  --disable-shared
+  --disable-static
+.sp
+to the \fBconfigure\fP command, as required.
+.
+.
+.SH "Unicode and UTF SUPPORT"
+.rs
+.sp
+To build PCRE2 with support for Unicode and UTF character strings, add
+.sp
+  --enable-unicode
+.sp
+to the \fBconfigure\fP command. This setting applies to all three libraries,
+adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
+library, and support for UTF-32 to the to the 32-bit library.
+It is not possible to build one library with
+UTF support and another without in the same configuration.
+.P
+Of itself, this setting does not make PCRE2 treat strings as UTF-8, UTF-16 or
+UTF-32. As well as compiling PCRE2 with this option, you also have have to set
+the PCRE2_UTF option when you call \fBpcre2_compile()\fP to compile a pattern.
+.P
+If you set --enable-unicode when compiling in an EBCDIC environment, PCRE2
+expects its input to be either ASCII or UTF-8 (depending on the run-time
+option). It is not possible to support both EBCDIC and UTF-8 codes in the same
+version of the library. Consequently, --enable-unicode and --enable-ebcdic are
+mutually exclusive.
+.P
+UTF support allows the libraries to process character codepoints up to 0x10ffff
+in the strings that they handle. It also provides support for accessing the
+properties of such characters, using pattern escapes such as \eP, \ep, and \eX.
+Only the general category properties such as \fILu\fP and \fINd\fP are
+supported. Details are given in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+documentation.
+.
+.
+.SH "JUST-IN-TIME COMPILER SUPPORT"
+.rs
+.sp
+Just-in-time compiler support is included in the build by specifying
+.sp
+  --enable-jit
+.sp
+This support is available only for certain hardware architectures. If this
+option is set for an unsupported architecture, a compile time error occurs.
+See the
+.\" HREF
+\fBpcre2jit\fP
+.\"
+documentation for a discussion of JIT usage. When JIT support is enabled,
+pcre2grep automatically makes use of it, unless you add
+.sp
+  --disable-pcre2grep-jit
+.sp
+to the "configure" command.
+.
+.
+.SH "CODE VALUE OF NEWLINE"
+.rs
+.sp
+By default, PCRE2 interprets the linefeed (LF) character as indicating the end
+of a line. This is the normal newline character on Unix-like systems. You can
+compile PCRE2 to use carriage return (CR) instead, by adding
+.sp
+  --enable-newline-is-cr
+.sp
+to the \fBconfigure\fP command. There is also a --enable-newline-is-lf option,
+which explicitly specifies linefeed as the newline character.
+.sp
+Alternatively, you can specify that line endings are to be indicated by the two
+character sequence CRLF. If you want this, add
+.sp
+  --enable-newline-is-crlf
+.sp
+to the \fBconfigure\fP command. There is a fourth option, specified by
+.sp
+  --enable-newline-is-anycrlf
+.sp
+which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as
+indicating a line ending. Finally, a fifth option, specified by
+.sp
+  --enable-newline-is-any
+.sp
+causes PCRE2 to recognize any Unicode newline sequence.
+.P
+Whatever line ending convention is selected when PCRE2 is built can be
+overridden when the library functions are called. At build time it is
+conventional to use the standard for your operating system.
+.
+.
+.SH "WHAT \eR MATCHES"
+.rs
+.sp
+By default, the sequence \eR in a pattern matches any Unicode newline sequence,
+whatever has been selected as the line ending sequence. If you specify
+.sp
+  --enable-bsr-anycrlf
+.sp
+the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is
+selected when PCRE2 is built can be overridden when the library functions are
+called.
+.
+.
+.SH "HANDLING VERY LARGE PATTERNS"
+.rs
+.sp
+Within a compiled pattern, offset values are used to point from one part to
+another (for example, from an opening parenthesis to an alternation
+metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
+are used for these offsets, leading to a maximum size for a compiled pattern of
+around 64K. This is sufficient to handle all but the most gigantic patterns.
+Nevertheless, some people do want to process truly enormous patterns, so it is
+possible to compile PCRE2 to use three-byte or four-byte offsets by adding a
+setting such as
+.sp
+  --with-link-size=3
+.sp
+to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the
+16-bit library, a value of 3 is rounded up to 4. In these libraries, using
+longer offsets slows down the operation of PCRE2 because it has to load
+additional data when handling them. For the 32-bit library the value is always
+4 and cannot be overridden; the value of --with-link-size is ignored.
+.
+.
+.SH "AVOIDING EXCESSIVE STACK USAGE"
+.rs
+.sp
+When matching with the \fBpcre2_match()\fP function, PCRE2 implements
+backtracking by making recursive calls to an internal function called
+\fBmatch()\fP. In environments where the size of the stack is limited, this can
+severely limit PCRE2's operation. (The Unix environment does not usually suffer
+from this problem, but it may sometimes be necessary to increase the maximum
+stack size. There is a discussion in the
+.\" HREF
+\fBpcre2stack\fP
+.\"
+documentation.) An alternative approach to recursion that uses memory from the
+heap to remember data, instead of using recursive function calls, has been
+implemented to work round the problem of limited stack size. If you want to
+build a version of PCRE2 that works this way, add
+.sp
+  --disable-stack-for-recursion
+.sp
+to the \fBconfigure\fP command. By default, the system functions \fBmalloc()\fP
+and \fBfree()\fP are called to manage the heap memory that is required, but
+custom memory management functions can be called instead. PCRE2 runs noticeably
+more slowly when built in this way. This option affects only the
+\fBpcre2_match()\fP function; it is not relevant for \fBpcre2_dfa_match()\fP.
+.
+.
+.SH "LIMITING PCRE2 RESOURCE USAGE"
+.rs
+.sp
+Internally, PCRE2 has a function called \fBmatch()\fP, which it calls
+repeatedly (sometimes recursively) when matching a pattern with the
+\fBpcre2_match()\fP function. By controlling the maximum number of times this
+function may be called during a single matching operation, a limit can be
+placed on the resources used by a single call to \fBpcre2_match()\fP. The limit
+can be changed at run time, as described in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation. The default is 10 million, but this can be changed by adding a
+setting such as
+.sp
+  --with-match-limit=500000
+.sp
+to the \fBconfigure\fP command. This setting has no effect on the
+\fBpcre2_dfa_match()\fP matching function.
+.P
+In some environments it is desirable to limit the depth of recursive calls of
+\fBmatch()\fP more strictly than the total number of calls, in order to
+restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
+is specified) that is used. A second limit controls this; it defaults to the
+value that is set for --with-match-limit, which imposes no additional
+constraints. However, you can set a lower limit by adding, for example,
+.sp
+  --with-match-limit-recursion=10000
+.sp
+to the \fBconfigure\fP command. This value can also be overridden at run time.
+.
+.
+.SH "CREATING CHARACTER TABLES AT BUILD TIME"
+.rs
+.sp
+PCRE2 uses fixed tables for processing characters whose code points are less
+than 256. By default, PCRE2 is built with a set of tables that are distributed
+in the file \fIsrc/pcre2_chartables.c.dist\fP. These tables are for ASCII codes
+only. If you add
+.sp
+  --enable-rebuild-chartables
+.sp
+to the \fBconfigure\fP command, the distributed tables are no longer used.
+Instead, a program called \fBdftables\fP is compiled and run. This outputs the
+source for new set of tables, created in the default locale of your C run-time
+system. (This method of replacing the tables does not work if you are cross
+compiling, because \fBdftables\fP is run on the local host. If you need to
+create alternative tables when cross compiling, you will have to do so "by
+hand".)
+.
+.
+.SH "USING EBCDIC CODE"
+.rs
+.sp
+PCRE2 assumes by default that it will run in an environment where the character
+code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
+most computer operating systems. PCRE2 can, however, be compiled to run in an
+EBCDIC environment by adding
+.sp
+  --enable-ebcdic
+.sp
+to the \fBconfigure\fP command. This setting implies
+--enable-rebuild-chartables. You should only use it if you know that you are in
+an EBCDIC environment (for example, an IBM mainframe operating system). The
+--enable-ebcdic option is incompatible with --enable-unicode.
+.P
+The EBCDIC character that corresponds to an ASCII LF is assumed to have the
+value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
+such an environment you should use
+.sp
+  --enable-ebcdic-nl25
+.sp
+as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
+same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is \fInot\fP
+chosen as LF is made to correspond to the Unicode NEL character (which, in
+Unicode, is 0x85).
+.P
+The options that select newline behaviour, such as --enable-newline-is-cr,
+and equivalent run-time options, refer to these character values in an EBCDIC
+environment.
+.
+.
+.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT"
+.rs
+.sp
+By default, \fBpcre2grep\fP reads all files as plain text. You can build it so
+that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads
+them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of
+.sp
+  --enable-pcre2grep-libz
+  --enable-pcre2grep-libbz2
+.sp
+to the \fBconfigure\fP command. These options naturally require that the
+relevant libraries are installed on your system. Configuration will fail if
+they are not.
+.
+.
+.SH "PCRE2GREP BUFFER SIZE"
+.rs
+.sp
+\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
+scanning, in order to be able to output "before" and "after" lines when it
+finds a match. The size of the buffer is controlled by a parameter whose
+default value is 20K. The buffer itself is three times this size, but because
+of the way it is used for holding "before" lines, the longest line that is
+guaranteed to be processable is the parameter size. You can change the default
+parameter value by adding, for example,
+.sp
+  --with-pcre2grep-bufsize=50K
+.sp
+to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can, however,
+override this value by specifying a run-time option.
+.
+.
+.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
+.rs
+.sp
+If you add one of
+.sp
+  --enable-pcre2test-libreadline
+  --enable-pcre2test-libedit 
+.sp
+to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
+\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
+from a terminal, it reads it using the \fBreadline()\fP function. This provides
+line-editing and history facilities. Note that \fBlibreadline\fP is
+GPL-licensed, so if you distribute a binary of \fBpcre2test\fP linked in this
+way, there may be licensing issues. These can be avoided by linking with
+\fBlibedit\fP (which has a BSD licence) instead.
+.P
+Setting this option causes the \fB-lreadline\fP option to be added to the
+\fBpcre2test\fP build. In many operating environments with a sytem-installed
+readline library this is sufficient. However, in some environments (e.g. if an
+unmodified distribution version of readline is in use), some extra
+configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
+this:
+.sp
+  "Readline uses the termcap functions, but does not link with 
+  the termcap or curses library itself, allowing applications 
+  which link with readline the to choose an appropriate library."
+.sp
+If your environment has not been set up so that an appropriate library is
+automatically included, you may need to add something like
+.sp
+  LIBS="-ncurses"
+.sp
+immediately before the \fBconfigure\fP command.
+.
+.
+.SH "DEBUGGING WITH VALGRIND SUPPORT"
+.rs
+.sp
+By adding the
+.sp
+  --enable-valgrind
+.sp
+option to to the \fBconfigure\fP command, PCRE2 will use valgrind annotations
+to mark certain memory regions as unaddressable. This allows it to detect
+invalid memory accesses, and is mostly useful for debugging PCRE2 itself.
+.
+.
+.SH "CODE COVERAGE REPORTING"
+.rs
+.sp
+If your C compiler is gcc, you can build a version of PCRE2 that can generate a
+code coverage report for its test suite. To enable this, you must install
+\fBlcov\fP version 1.6 or above. Then specify
+.sp
+  --enable-coverage
+.sp
+to the \fBconfigure\fP command and build PCRE2 in the usual way.
+.P
+Note that using \fBccache\fP (a caching C compiler) is incompatible with code
+coverage reporting. If you have configured \fBccache\fP to run automatically
+on your system, you must set the environment variable
+.sp
+  CCACHE_DISABLE=1
+.sp
+before running \fBmake\fP to build PCRE2, so that \fBccache\fP is not used.
+.P
+When --enable-coverage is used, the following addition targets are added to the
+\fIMakefile\fP:
+.sp
+  make coverage
+.sp
+This creates a fresh coverage report for the PCRE2 test suite. It is equivalent
+to running "make coverage-reset", "make coverage-baseline", "make check", and
+then "make coverage-report".
+.sp
+  make coverage-reset
+.sp
+This zeroes the coverage counters, but does nothing else.
+.sp
+  make coverage-baseline
+.sp
+This captures baseline coverage information.
+.sp
+  make coverage-report
+.sp
+This creates the coverage report.
+.sp
+  make coverage-clean-report
+.sp
+This removes the generated coverage report without cleaning the coverage data
+itself.
+.sp
+  make coverage-clean-data
+.sp
+This removes the captured coverage data without removing the coverage files
+created at compile time (*.gcno).
+.sp
+  make coverage-clean
+.sp
+This cleans all coverage data including the generated coverage report. For more
+information about code coverage, see the \fBgcov\fP and \fBlcov\fP
+documentation.
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2api\fP(3), \fBpcre2_config\fP(3).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2compat.3 b/doc/pcre2compat.3
new file mode 100644
index 0000000..d40742d
--- /dev/null
+++ b/doc/pcre2compat.3
@@ -0,0 +1,190 @@
+.TH PCRE2COMPAT 3 "28 September 2014" "PCRE2 10.0"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
+.rs
+.sp
+This document describes the differences in the ways that PCRE2 and Perl handle
+regular expressions. The differences described here are with respect to Perl
+versions 5.10 and above.
+.P
+1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+have are given in the
+.\" HREF
+\fBpcre2unicode\fP
+.\"
+page.
+.P
+2. PCRE2 allows repeat quantifiers only on parenthesized assertions, but they
+do not mean what you might think. For example, (?!a){3} does not assert that
+the next three characters are not "a". It just asserts that the next character
+is not "a" three times (in principle: PCRE2 optimizes this to run the assertion
+just once). Perl allows repeat quantifiers on other assertions such as \eb, but
+these do not seem to have any use.
+.P
+3. Capturing subpatterns that occur inside negative lookahead assertions are
+counted, but their entries in the offsets vector are never set. Perl sometimes
+(but not always) sets its numerical variables from inside negative assertions.
+.P
+4. The following Perl escape sequences are not supported: \el, \eu, \eL,
+\eU, and \eN when followed by a character name or Unicode value. (\eN on its
+own, matching a non-newline character, is supported.) In fact these are
+implemented by Perl's general string-handling and are not part of its pattern
+matching engine. If any of these are encountered by PCRE2, an error is
+generated by default. However, if the PCRE2_ALT_BSUX option is set,
+\eU and \eu are interpreted as ECMAScript interprets them.
+.P
+5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
+built with Unicode support. The properties that can be tested with \ep and \eP
+are limited to the general category properties such as Lu and Nd, script names
+such as Greek or Han, and the derived properties Any and L&. PCRE2 does support
+the Cs (surrogate) property, which Perl does not; the Perl documentation says
+"Because Perl hides the need for the user to understand the internal
+representation of Unicode characters, there is no need to implement the
+somewhat messy concept of surrogates."
+.P
+6. PCRE2 does support the \eQ...\eE escape for quoting substrings. Characters
+in between are treated as literals. This is slightly different from Perl in
+that $ and @ are also handled as literals inside the quotes. In Perl, they
+cause variable interpolation (but of course PCRE2 does not have variables).
+Note the following examples:
+.sp
+    Pattern            PCRE2 matches      Perl matches
+.sp
+.\" JOIN
+    \eQabc$xyz\eE        abc$xyz           abc followed by the
+                                           contents of $xyz
+    \eQabc\e$xyz\eE       abc\e$xyz          abc\e$xyz
+    \eQabc\eE\e$\eQxyz\eE   abc$xyz           abc$xyz
+.sp
+The \eQ...\eE sequence is recognized both inside and outside character classes.
+.P
+7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+constructions. However, there is support for recursive patterns. This is not
+available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE2 "callout"
+feature allows an external function to be called during pattern matching. See
+the
+.\" HREF
+\fBpcre2callout\fP
+.\"
+documentation for details.
+.P
+8. Subpatterns that are called as subroutines (whether or not recursively) are
+always treated as atomic groups in PCRE2. This is like Python, but unlike Perl.
+Captured values that are set outside a subroutine call can be reference from
+inside in PCRE2, but not in Perl. There is a discussion that explains these
+differences in more detail in the
+.\" HTML <a href="pcre2pattern.html#recursiondifference">
+.\" </a>
+section on recursion differences from Perl
+.\"
+in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+page.
+.P
+9. If any of the backtracking control verbs are used in a subpattern that is
+called as a subroutine (whether or not recursively), their effect is confined
+to that subpattern; it does not extend to the surrounding pattern. This is not
+always the case in Perl. In particular, if (*THEN) is present in a group that
+is called as a subroutine, its action is limited to that group, even if the
+group does not contain any | characters. Note that such subpatterns are
+processed as anchored at the point where they are tested.
+.P
+10. If a pattern contains more than one backtracking control verb, the first
+one that is backtracked onto acts. For example, in the pattern
+A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
+triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
+same as PCRE2, but there are examples where it differs.
+.P
+11. Most backtracking verbs in assertions have their normal actions. They are
+not confined to the assertion.
+.P
+12. There are some differences that are concerned with the settings of captured
+strings when part of a pattern is repeated. For example, matching "aba" against
+the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
+"b".
+.P
+13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
+names is not as general as Perl's. This is a consequence of the fact the PCRE2
+works internally just with numbers, using an external table to translate
+between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
+where the two capturing parentheses have the same number but different names,
+is not supported, and causes an error at compile time. If it were allowed, it
+would not be possible to distinguish which parentheses matched, because both
+names map to capturing subpattern number 1. To avoid this confusing situation,
+an error is given at compile time.
+.P
+14. Perl recognizes comments in some places that PCRE2 does not, for example,
+between the ( and ? at the start of a subpattern. If the /x modifier is set,
+Perl allows white space between ( and ? (though current Perls warn that this is
+deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
+.P
+15. Perl, when in warning mode, gives warnings for character classes such as
+[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
+warning features, so it gives an error in these cases because they are almost
+certainly user mistakes.
+.P
+16. In PCRE2, the upper/lower case character properties Lu and Ll are not
+affected when case-independent matching is specified. For example, \ep{Lu}
+always matches an upper case letter. I think Perl has changed in this respect;
+in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
+letters, regardless of case, when case independence is specified.
+.P
+17. PCRE2 provides some extensions to the Perl regular expression facilities.
+Perl 5.10 includes new features that are not in earlier versions of Perl, some
+of which (such as named parentheses) have been in PCRE2 for some time. This
+list is with respect to Perl 5.10:
+.sp
+(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
+each alternative branch of a lookbehind assertion can match a different length
+of string. Perl requires them all to have the same length.
+.sp
+(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
+meta-character matches only at the very end of the string.
+.sp
+(c) A backslash followed by a letter with no special meaning is faulted. (Perl
+can be made to issue a warning.)
+.sp
+(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
+inverted, that is, by default they are not greedy, but if followed by a
+question mark they are.
+.sp
+(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
+only at the first matching position in the subject string.
+.sp
+(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
+PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
+.sp
+(g) The \eR escape sequence can be restricted to match only CR, LF, or CRLF
+by the PCRE2_BSR_ANYCRLF option.
+.sp
+(h) The callout facility is PCRE2-specific.
+.sp
+(i) The partial matching facility is PCRE2-specific.
+.sp
+(j) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a
+different way and is not Perl-compatible.
+.sp
+(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
+a pattern that set overall options that cannot be changed within the pattern.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
new file mode 100644
index 0000000..3d6f22b
--- /dev/null
+++ b/doc/pcre2grep.1
@@ -0,0 +1,683 @@
+.TH PCRE2GREP 1 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+pcre2grep - a grep with Perl-compatible regular expressions.
+.SH SYNOPSIS
+.B pcre2grep [options] [long options] [pattern] [path1 path2 ...]
+.
+.SH DESCRIPTION
+.rs
+.sp
+\fBpcre2grep\fP searches files for character patterns, in the same way as other
+grep commands do, but it uses the PCRE2 regular expression library to support
+patterns that are compatible with the regular expressions of Perl 5. See
+.\" HREF
+\fBpcre2syntax\fP(3)
+.\"
+for a quick-reference summary of pattern syntax, or
+.\" HREF
+\fBpcre2pattern\fP(3)
+.\"
+for a full description of the syntax and semantics of the regular expressions
+that PCRE2 supports.
+.P
+Patterns, whether supplied on the command line or in a separate file, are given
+without delimiters. For example:
+.sp
+  pcre2grep Thursday /etc/motd
+.sp
+If you attempt to use delimiters (for example, by surrounding a pattern with
+slashes, as is common in Perl scripts), they are interpreted as part of the
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed quotes are required if a
+pattern contains white space or shell metacharacters.
+.P
+The first argument that follows any option settings is treated as the single
+pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present.
+Conversely, when one or both of these options are used to specify patterns, all
+arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an
+argument pattern must be provided.
+.P
+If no files are specified, \fBpcre2grep\fP reads the standard input. The
+standard input can also be referenced by a name consisting of a single hyphen.
+For example:
+.sp
+  pcre2grep some-pattern /file1 - /file3
+.sp
+By default, each line that matches a pattern is copied to the standard
+output, and if there is more than one file, the file name is output at the
+start of each line, followed by a colon. However, there are options that can
+change how \fBpcre2grep\fP behaves. In particular, the \fB-M\fP option makes it
+possible to search for patterns that span line boundaries. What defines a line
+boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
+.P
+The amount of memory used for buffering files that are being scanned is
+controlled by a parameter that can be set by the \fB--buffer-size\fP option.
+The default value for this parameter is specified when \fBpcre2grep\fP is built,
+with the default default being 20K. A block of memory three times this size is
+used (to allow for buffering "before" and "after" lines). An error occurs if a
+line overflows the buffer.
+.P
+Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
+(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
+each line in the order in which they are defined, except that all the \fB-e\fP
+patterns are tried before the \fB-f\fP patterns.
+.P
+By default, as soon as one pattern matches a line, no further patterns are
+considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the
+matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
+\fB--line-offsets\fP is used to output only the part of the line that matched
+(either shown literally, or as an offset), scanning resumes immediately
+following the match, so that further matches on the same line can be found. If
+there are multiple patterns, they are all tried on the remainder of the line,
+but patterns that follow the one that matched are not tried on the earlier part
+of the line.
+.P
+This behaviour means that the order in which multiple patterns are specified
+can affect the output when one of the above options is used. This is no longer
+the same behaviour as GNU grep, which now manages to display earlier matches
+for later patterns (as long as there is no overlap).
+.P
+Patterns that can match an empty string are accepted, but empty string
+matches are never recognized. An example is the pattern "(super)?(man)?", in
+which all components are optional. This pattern finds all occurrences of both
+"super" and "man"; the output differs from matching with "super|man" when only
+the matching substrings are being shown.
+.P
+If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set,
+\fBpcre2grep\fP uses the value to set a locale when calling the PCRE2 library.
+The \fB--locale\fP option can be used to override this.
+.
+.
+.SH "SUPPORT FOR COMPRESSED FILES"
+.rs
+.sp
+It is possible to compile \fBpcre2grep\fP so that it uses \fBlibz\fP or
+\fBlibbz2\fP to read files whose names end in \fB.gz\fP or \fB.bz2\fP,
+respectively. You can find out whether your binary has support for one or both
+of these file types by running it with the \fB--help\fP option. If the
+appropriate support is not present, files are treated as plain text. The
+standard input is always so treated.
+.
+.
+.SH "BINARY FILES"
+.rs
+.sp
+By default, a file that contains a binary zero byte within the first 1024 bytes
+is identified as a binary file, and is processed specially. (GNU grep also
+identifies binary files in this manner.) See the \fB--binary-files\fP option
+for a means of changing the way binary files are handled.
+.
+.
+.SH OPTIONS
+.rs
+.sp
+The order in which some of the options appear can affect the output. For
+example, both the \fB-h\fP and \fB-l\fP options affect the printing of file
+names. Whichever comes later in the command line will be the one that takes
+effect. Similarly, except where noted below, if an option is given twice, the
+later setting is used. Numerical values for options may be followed by K or M,
+to signify multiplication by 1024 or 1024*1024 respectively.
+.TP 10
+\fB--\fP
+This terminates the list of options. It is useful if the next item on the
+command line starts with a hyphen but is not an option. This allows for the
+processing of patterns and filenames that start with hyphens.
+.TP
+\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
+Output \fInumber\fP lines of context after each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
+guarantees to have up to 8K of following text available for context output.
+.TP
+\fB-a\fP, \fB--text\fP
+Treat binary files as text. This is equivalent to
+\fB--binary-files\fP=\fItext\fP.
+.TP
+\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
+Output \fInumber\fP lines of context before each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
+guarantees to have up to 8K of preceding text available for context output.
+.TP
+\fB--binary-files=\fP\fIword\fP
+Specify how binary files are to be processed. If the word is "binary" (the
+default), pattern matching is performed on binary files, but the only output is
+"Binary file <name> matches" when a match succeeds. If the word is "text",
+which is equivalent to the \fB-a\fP or \fB--text\fP option, binary files are
+processed in the same way as any other file. In this case, when a match
+succeeds, the output may be binary garbage, which can have nasty effects if
+sent to a terminal. If the word is "without-match", which is equivalent to the
+\fB-I\fP option, binary files are not processed at all; they are assumed not to
+be of interest.
+.TP
+\fB--buffer-size=\fP\fInumber\fP
+Set the parameter that controls how much memory is used for buffering files
+that are being scanned.
+.TP
+\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
+Output \fInumber\fP lines of context both before and after each matching line.
+This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value.
+.TP
+\fB-c\fP, \fB--count\fP
+Do not output individual lines from the files that are being scanned; instead
+output the number of lines that would otherwise have been shown. If no lines
+are selected, the number zero is output. If several files are are being
+scanned, a count is output for each of them. However, if the
+\fB--files-with-matches\fP option is also used, only those files whose counts
+are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
+\fB-B\fP, and \fB-C\fP options are ignored.
+.TP
+\fB--colour\fP, \fB--color\fP
+If this option is given without any data, it is equivalent to "--colour=auto".
+If data is required, it must be given in the same shell item, separated by an
+equals sign.
+.TP
+\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP
+This option specifies under what circumstances the parts of a line that matched
+a pattern should be coloured in the output. By default, the output is not
+coloured. The value (which is optional, see above) may be "never", "always", or
+"auto". In the latter case, colouring happens only if the standard output is
+connected to a terminal. More resources are used when colouring is enabled,
+because \fBpcre2grep\fP has to search for all possible matches in a line, not
+just one, in order to colour them all.
+.sp
+The colour that is used can be specified by setting the environment variable
+PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
+.TP
+\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
+If an input path is not a regular file or a directory, "action" specifies how
+it is to be processed. Valid values are "read" (the default) or "skip"
+(silently skip the path).
+.TP
+\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP
+If an input path is a directory, "action" specifies how it is to be processed.
+Valid values are "read" (the default in non-Windows environments, for
+compatibility with GNU grep), "recurse" (equivalent to the \fB-r\fP option), or
+"skip" (silently skip the path, the default in Windows environments). In the
+"read" case, directories are read as if they were ordinary files. In some
+operating systems the effect of reading a directory like this is an immediate
+end-of-file; in others it may provoke an error.
+.TP
+\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is no limit to the number of patterns. They are applied to each
+line in the order in which they are defined until one matches.
+.sp
+If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first,
+followed by the patterns from the file(s), independent of the order in which
+these options are specified. Note that multiple use of \fB-e\fP is not the same
+as a single pattern with alternatives. For example, X|Y finds the first
+character in a line that is X or Y, whereas if the two patterns are given
+separately, with X first, \fBpcre2grep\fP finds X if it is present, even if it
+follows Y in the line. It finds Y only if there is no X in the line. This
+matters only if you are using \fB-o\fP or \fB--colo(u)r\fP to show the part(s)
+of the line that matched.
+.TP
+\fB--exclude\fP=\fIpattern\fP
+Files (but not directories) whose names match the pattern are skipped without
+being processed. This applies to all files, whether listed on the command line,
+obtained from \fB--file-list\fP, or by scanning a directory. The pattern is a
+PCRE2 regular expression, and is matched against the final component of the file
+name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not
+apply to this pattern. The option may be given any number of times in order to
+specify multiple patterns. If a file name matches both an \fB--include\fP
+and an \fB--exclude\fP pattern, it is excluded. There is no short form for this
+option.
+.TP
+\fB--exclude-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--exclude\fP
+option. What constitutes a newline when reading the file is the operating
+system's default. The \fB--newline\fP option has no effect on this option. This
+option may be given more than once in order to specify a number of files to
+read.
+.TP
+\fB--exclude-dir\fP=\fIpattern\fP
+Directories whose names match the pattern are skipped without being processed,
+whatever the setting of the \fB--recursive\fP option. This applies to all
+directories, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a parent directory. The pattern is a PCRE2
+regular expression, and is matched against the final component of the directory
+name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not
+apply to this pattern. The option may be given any number of times in order to
+specify more than one pattern. If a directory matches both \fB--include-dir\fP
+and \fB--exclude-dir\fP, it is excluded. There is no short form for this
+option.
+.TP
+\fB-F\fP, \fB--fixed-strings\fP
+Interpret each data-matching pattern as a list of fixed strings, separated by
+newlines, instead of as a regular expression. What constitutes a newline for
+this purpose is controlled by the \fB--newline\fP option. The \fB-w\fP (match
+as a word) and \fB-x\fP (match whole line) options can be used with \fB-F\fP.
+They apply to each of the fixed strings. A line is selected if any of the fixed
+strings are found in it (subject to \fB-w\fP or \fB-x\fP, if present). This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the \fB--include\fP or
+\fB--exclude\fP options.
+.TP
+\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
+Read patterns from the file, one per line, and match them against
+each line of input. What constitutes a newline when reading the file is the
+operating system's default. The \fB--newline\fP option has no effect on this
+option. Trailing white space is removed from each line, and blank lines are
+ignored. An empty file contains no patterns and therefore matches nothing. See
+also the comments about multiple patterns versus a single pattern with
+alternatives in the description of \fB-e\fP above.
+.sp
+If this option is given more than once, all the specified files are
+read. A data line is output if any of the patterns match it. A filename can
+be given as "-" to refer to the standard input. When \fB-f\fP is used, patterns
+specified on the command line using \fB-e\fP may also be present; they are
+tested before the file's patterns. However, no other pattern is taken from the
+command line; all arguments are treated as the names of paths to be searched.
+.TP
+\fB--file-list\fP=\fIfilename\fP
+Read a list of files and/or directories that are to be scanned from the given
+file, one per line. Trailing white space is removed from each line, and blank
+lines are ignored. These paths are processed before any that are listed on the
+command line. The filename can be given as "-" to refer to the standard input.
+If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are
+read first. This is useful only when the standard input is a terminal, from
+which further lines (the list of files) can be read after an end-of-file
+indication. If this option is given more than once, all the specified files are
+read.
+.TP
+\fB--file-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as an
+offset from the start of the file and a length, separated by a comma. In this
+mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with \fB--line-offsets\fP
+and \fB--only-matching\fP.
+.TP
+\fB-H\fP, \fB--with-filename\fP
+Force the inclusion of the filename at the start of output lines when searching
+a single file. By default, the filename is not shown in this case. For matching
+lines, the filename is followed by a colon; for context lines, a hyphen
+separator is used. If a line number is also being output, it follows the file
+name.
+.TP
+\fB-h\fP, \fB--no-filename\fP
+Suppress the output filenames when searching multiple files. By default,
+filenames are shown when multiple files are searched. For matching lines, the
+filename is followed by a colon; for context lines, a hyphen separator is used.
+If a line number is also being output, it follows the file name.
+.TP
+\fB--help\fP
+Output a help message, giving brief details of the command options and file
+type support, and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-I\fP
+Treat binary files as never matching. This is equivalent to
+\fB--binary-files\fP=\fIwithout-match\fP.
+.TP
+\fB-i\fP, \fB--ignore-case\fP
+Ignore upper/lower case distinctions during comparisons.
+.TP
+\fB--include\fP=\fIpattern\fP
+If any \fB--include\fP patterns are specified, the only files that are
+processed are those that match one of the patterns (and do not match an
+\fB--exclude\fP pattern). This option does not affect directories, but it
+applies to all files, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
+expression, and is matched against the final component of the file name, not
+the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not apply to
+this pattern. The option may be given any number of times. If a file name
+matches both an \fB--include\fP and an \fB--exclude\fP pattern, it is excluded.
+There is no short form for this option.
+.TP
+\fB--include-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--include\fP
+option. What constitutes a newline for this purpose is the operating system's
+default. The \fB--newline\fP option has no effect on this option. This option
+may be given any number of times; all the files are read.
+.TP
+\fB--include-dir\fP=\fIpattern\fP
+If any \fB--include-dir\fP patterns are specified, the only directories that
+are processed are those that match one of the patterns (and do not match an
+\fB--exclude-dir\fP pattern). This applies to all directories, whether listed
+on the command line, obtained from \fB--file-list\fP, or by scanning a parent
+directory. The pattern is a PCRE2 regular expression, and is matched against the
+final component of the directory name, not the entire path. The \fB-F\fP,
+\fB-w\fP, and \fB-x\fP options do not apply to this pattern. The option may be
+given any number of times. If a directory matches both \fB--include-dir\fP and
+\fB--exclude-dir\fP, it is excluded. There is no short form for this option.
+.TP
+\fB-L\fP, \fB--files-without-match\fP
+Instead of outputting lines from the files, just output the names of the files
+that do not contain any lines that would have been output. Each file name is
+output once, on a separate line.
+.TP
+\fB-l\fP, \fB--files-with-matches\fP
+Instead of outputting lines from the files, just output the names of the files
+containing lines that would have been output. Each file name is output
+once, on a separate line. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches.
+.TP
+\fB--label\fP=\fIname\fP
+This option supplies a name to be used for the standard input when file names
+are being output. If not supplied, "(standard input)" is used. There is no
+short form for this option.
+.TP
+\fB--line-buffered\fP
+When this option is given, input is read and processed line by line, and the
+output is flushed after each write. By default, input is read in large chunks,
+unless \fBpcre2grep\fP can determine that it is reading from a terminal (which
+is currently possible only in Unix-like environments). Output to terminal is
+normally automatically flushed by the operating system. This option can be
+useful when the input or output is attached to a pipe and you do not want
+\fBpcre2grep\fP to buffer up large amounts of data. However, its use will affect
+performance, and the \fB-M\fP (multiline) option ceases to work.
+.TP
+\fB--line-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as a
+line number, the offset from the start of the line, and a length. The line
+number is terminated by a colon (as usual; see the \fB-n\fP option), and the
+offset and length are separated by a comma. In this mode, no context is shown.
+That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
+more than one match in a line, each of them is shown separately. This option is
+mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
+.TP
+\fB--locale\fP=\fIlocale-name\fP
+This option specifies a locale to be used for pattern matching. It overrides
+the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
+locale is specified, the PCRE2 library's default (usually the "C" locale) is
+used. There is no short form for this option.
+.TP
+\fB--match-limit\fP=\fInumber\fP
+Processing some regular expression patterns can require a very large amount of
+memory, leading in some cases to a program crash if not enough is available.
+Other patterns may take a very long time to search for all possible matching
+strings. The \fBpcre2_exec()\fP function that is called by \fBpcre2grep\fP to do
+the matching has two parameters that can limit the resources that it uses.
+.sp
+The \fB--match-limit\fP option provides a means of limiting resource usage
+when processing patterns that are not going to match, but which have a very
+large number of possibilities in their search trees. The classic example is a
+pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
+called \fBmatch()\fP which it calls repeatedly (sometimes recursively). The
+limit set by \fB--match-limit\fP is imposed on the number of times this
+function is called during a match, which has the effect of limiting the amount
+of backtracking that can take place.
+.sp
+The \fB--recursion-limit\fP option is similar to \fB--match-limit\fP, but
+instead of limiting the total number of times that \fBmatch()\fP is called, it
+limits the depth of recursive calls, which in turn limits the amount of memory
+that can be used. The recursion depth is a smaller number than the total number
+of calls, because not all calls to \fBmatch()\fP are recursive. This limit is
+of use only if it is set smaller than \fB--match-limit\fP.
+.sp
+There are no short forms for these options. The default settings are specified
+when the PCRE2 library is compiled, with the default default being 10 million.
+.TP
+\fB-M\fP, \fB--multiline\fP
+Allow patterns to match more than one line. When this option is given, patterns
+may usefully contain literal newline characters and internal occurrences of ^
+and $ characters. The output for a successful match may consist of more than
+one line, the last of which is the one in which the match ended. If the matched
+string ends with a newline sequence the output ends at the end of that line.
+.sp
+When this option is set, the PCRE2 library is called in "multiline" mode.
+There is a limit to the number of lines that can be matched, imposed by the way
+that \fBpcre2grep\fP buffers the input file as it scans it. However,
+\fBpcre2grep\fP ensures that at least 8K characters or the rest of the document
+(whichever is the shorter) are available for forward matching, and similarly
+the previous 8K characters (or all the previous characters, if fewer than 8K)
+are guaranteed to be available for lookbehind assertions. This option does not
+work when input is read line by line (see \fP--line-buffered\fP.)
+.TP
+\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
+The PCRE2 library supports five different conventions for indicating
+the ends of lines. They are the single-character sequences CR (carriage return)
+and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
+which recognizes any of the preceding three types, and an "any" convention, in
+which any Unicode line ending sequence is assumed to end a line. The Unicode
+sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
+(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
+PS (paragraph separator, U+2029).
+.sp
+When the PCRE2 library is built, a default line-ending sequence is specified.
+This is normally the standard sequence for the operating system. Unless
+otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
+The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
+makes it possible to use \fBpcre2grep\fP to scan files that have come from other
+environments without having to modify their line endings. If the data that is
+being scanned does not agree with the convention set by this option,
+\fBpcre2grep\fP may behave in strange ways. Note that this option does not
+apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options, which are expected to use the operating system's
+standard newline sequence.
+.TP
+\fB-n\fP, \fB--line-number\fP
+Precede each output line by its line number in the file, followed by a colon
+for matching lines or a hyphen for context lines. If the filename is also being
+output, it precedes the line number. This option is forced if
+\fB--line-offsets\fP is used.
+.TP
+\fB--no-jit\fP
+If the PCRE2 library is built with support for just-in-time compiling (which
+speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
+\fB-o\fP, \fB--only-matching\fP
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
+\fB-C\fP options are ignored. If there is more than one match in a line, each
+of them is shown separately. If \fB-o\fP is combined with \fB-v\fP (invert the
+sense of the match to find non-matching lines), no output is generated, but the
+return code is set appropriately. If the matched portion of the line is empty,
+nothing is output unless the file name or line number are being printed, in
+which case they are shown on an otherwise empty line. This option is mutually
+exclusive with \fB--file-offsets\fP and \fB--line-offsets\fP.
+.TP
+\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 32 capturing parentheses are supported, and -o0 is
+equivalent to \fB-o\fP without a number. Because these options can be given
+without an argument (see above), if an argument is present, it must be given in
+the same shell item, for example, -o3 or --only-matching=2. The comments given
+for the non-argument case above also apply to this case. If the specified
+capturing parentheses do not exist in the pattern, or were not set in the
+match, nothing is output unless the file name or line number are being printed.
+.sp
+If this option is given multiple times, multiple substrings are output, in the
+order the options are given. For example, -o3 -o1 -o3 causes the substrings
+matched by capturing parentheses 3 and 1 and then 3 again to be output. By
+default, there is no separator (but see the next option).
+.TP
+\fB--om-separator\fP=\fItext\fP
+Specify a separating string for multiple occurrences of \fB-o\fP. The default
+is an empty string. Separating strings are never coloured.
+.TP
+\fB-q\fP, \fB--quiet\fP
+Work quietly, that is, display nothing except error messages. The exit
+status indicates whether or not any matches were found.
+.TP
+\fB-r\fP, \fB--recursive\fP
+If any given path is a directory, recursively scan the files it contains,
+taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a
+directory is read as a normal file; in some operating systems this gives an
+immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
+option to "recurse".
+.TP
+\fB--recursion-limit\fP=\fInumber\fP
+See \fB--match-limit\fP above.
+.TP
+\fB-s\fP, \fB--no-messages\fP
+Suppress error messages about non-existent or unreadable files. Such files are
+quietly skipped. However, the return code is still 2, even if matches were
+found in other files.
+.TP
+\fB-u\fP, \fB--utf-8\fP
+Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
+with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
+\fB--include\fP options) and all subject lines that are scanned must be valid
+strings of UTF-8 characters.
+.TP
+\fB-V\fP, \fB--version\fP
+Write the version numbers of \fBpcre2grep\fP and the PCRE2 library to the
+standard output and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-v\fP, \fB--invert-match\fP
+Invert the sense of the match, so that lines which do \fInot\fP match any of
+the patterns are the ones that are found.
+.TP
+\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
+Force the patterns to match only whole words. This is equivalent to having \eb
+at the start and end of the pattern. This option applies only to the patterns
+that are matched against the contents of files; it does not apply to patterns
+specified by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
+Force the patterns to be anchored (each must start matching at the beginning of
+a line) and in addition, require them to match entire lines. This is equivalent
+to having ^ and $ characters at the start and end of each alternative branch in
+every pattern. This option applies only to the patterns that are matched
+against the contents of files; it does not apply to patterns specified by any
+of the \fB--include\fP or \fB--exclude\fP options.
+.
+.
+.SH "ENVIRONMENT VARIABLES"
+.rs
+.sp
+The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that
+order, for a locale. The first one that is set is used. This can be overridden
+by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
+(usually the "C" locale) is used.
+.
+.
+.SH "NEWLINES"
+.rs
+.sp
+The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
+different newline conventions from the default. Any parts of the input files
+that are written to the standard output are copied identically, with whatever
+newline sequences they have in the input. However, the setting of this option
+does not affect the interpretation of files specified by the \fB-f\fP,
+\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use
+the operating system's standard newline sequence, nor does it affect the way in
+which \fBpcre2grep\fP writes informational messages to the standard error and
+output streams. For these it uses the string "\en" to indicate newlines,
+relying on the C I/O library to convert this to an appropriate sequence.
+.
+.
+.SH "OPTIONS COMPATIBILITY"
+.rs
+.sp
+Many of the short and long forms of \fBpcre2grep\fP's options are the same
+as in the GNU \fBgrep\fP program. Any long option of the form
+\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
+(PCRE2 terminology). However, the \fB--file-list\fP, \fB--file-offsets\fP,
+\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP,
+\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
+\fB--recursion-limit\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
+\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
+capturing parentheses number.
+.P
+Although most of the common options work the same way, a few are different in
+\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
+for GNU \fBgrep\fP, but a regular expression for \fBpcre2grep\fP. If both the
+\fB-c\fP and \fB-l\fP options are given, GNU grep lists only file names,
+without counts, but \fBpcre2grep\fP gives the counts.
+.
+.
+.SH "OPTIONS WITH DATA"
+.rs
+.sp
+There are four different ways in which an option with data can be specified.
+If a short form option is used, the data may follow immediately, or (with one
+exception) in the next command line item. For example:
+.sp
+  -f/some/file
+  -f /some/file
+.sp
+The exception is the \fB-o\fP option, which may appear with or without data.
+Because of this, if data is present, it must follow immediately in the same
+item, for example -o3.
+.P
+If a long form option is used, the data may appear in the same command line
+item, separated by an equals character, or (with two exceptions) it may appear
+in the next command line item. For example:
+.sp
+  --file=/some/file
+  --file /some/file
+.sp
+Note, however, that if you want to supply a file name beginning with ~ as data
+in a shell command, and have the shell expand ~ to a home directory, you must
+separate the file name from the option, because the shell does not treat ~
+specially unless it is at the start of an item.
+.P
+The exceptions to the above are the \fB--colour\fP (or \fB--color\fP) and
+\fB--only-matching\fP options, for which the data is optional. If one of these
+options does have data, it must be given in the first form, using an equals
+character. Otherwise \fBpcre2grep\fP will assume that it has no data.
+.
+.
+.SH "MATCHING ERRORS"
+.rs
+.sp
+It is possible to supply a regular expression that takes a very long time to
+fail to match certain lines. Such patterns normally involve nested indefinite
+repeats, for example: (a+)*\ed when matched against a line of a's with no final
+digit. The PCRE2 matching function has a resource limit that causes it to abort
+in these circumstances. If this happens, \fBpcre2grep\fP outputs an error
+message and the line that caused the problem to the standard error stream. If
+there are more than 20 such errors, \fBpcre2grep\fP gives up.
+.P
+The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the overall
+resource limit; there is a second option called \fB--recursion-limit\fP that
+sets a limit on the amount of memory (usually stack) that is used (see the
+discussion of these options above).
+.
+.
+.SH DIAGNOSTICS
+.rs
+.sp
+Exit status is 0 if any matches were found, 1 if no matches were found, and 2
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+\fB-s\fP option to suppress error messages about inaccessible files does not
+affect the return code.
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2test\fP(1).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
new file mode 100644
index 0000000..c4b177c
--- /dev/null
+++ b/doc/pcre2grep.txt
@@ -0,0 +1,741 @@
+PCRE2GREP(1)                General Commands Manual               PCRE2GREP(1)
+
+
+
+NAME
+       pcre2grep - a grep with Perl-compatible regular expressions.
+
+SYNOPSIS
+       pcre2grep [options] [long options] [pattern] [path1 path2 ...]
+
+
+DESCRIPTION
+
+       pcre2grep  searches  files  for  character patterns, in the same way as
+       other grep commands do,  but  it  uses  the  PCRE2  regular  expression
+       library  to  support  patterns  that  are  compatible  with the regular
+       expressions of Perl 5. See pcre2syntax(3) for a quick-reference summary
+       of  pattern  syntax,  or  pcre2pattern(3) for a full description of the
+       syntax and semantics of the regular expressions that PCRE2 supports.
+
+       Patterns, whether supplied on the command line or in a  separate  file,
+       are given without delimiters. For example:
+
+         pcre2grep Thursday /etc/motd
+
+       If you attempt to use delimiters (for example, by surrounding a pattern
+       with slashes, as is common in Perl scripts), they  are  interpreted  as
+       part  of  the pattern. Quotes can of course be used to delimit patterns
+       on the command line because they are  interpreted  by  the  shell,  and
+       indeed  quotes  are required if a pattern contains white space or shell
+       metacharacters.
+
+       The first argument that follows any option settings is treated  as  the
+       single  pattern  to be matched when neither -e nor -f is present.  Con-
+       versely, when one or both of these options are  used  to  specify  pat-
+       terns, all arguments are treated as path names. At least one of -e, -f,
+       or an argument pattern must be provided.
+
+       If no files are specified, pcre2grep  reads  the  standard  input.  The
+       standard  input can also be referenced by a name consisting of a single
+       hyphen.  For example:
+
+         pcre2grep some-pattern /file1 - /file3
+
+       By default, each line that matches a pattern is copied to the  standard
+       output,  and if there is more than one file, the file name is output at
+       the start of each line, followed by a colon. However, there are options
+       that  can  change  how  pcre2grep behaves. In particular, the -M option
+       makes it possible to search for patterns  that  span  line  boundaries.
+       What  defines  a  line  boundary  is  controlled  by the -N (--newline)
+       option.
+
+       The amount of memory used for buffering files that are being scanned is
+       controlled  by a parameter that can be set by the --buffer-size option.
+       The default value for this parameter is  specified  when  pcre2grep  is
+       built,  with  the  default  default  being 20K. A block of memory three
+       times this size is used (to allow for buffering  "before"  and  "after"
+       lines). An error occurs if a line overflows the buffer.
+
+       Patterns  can  be  no  longer than 8K or BUFSIZ bytes, whichever is the
+       greater.  BUFSIZ is defined in <stdio.h>. When there is more  than  one
+       pattern (specified by the use of -e and/or -f), each pattern is applied
+       to each line in the order in which they are defined,  except  that  all
+       the -e patterns are tried before the -f patterns.
+
+       By  default, as soon as one pattern matches a line, no further patterns
+       are considered. However, if --colour (or --color) is used to colour the
+       matching  substrings, or if --only-matching, --file-offsets, or --line-
+       offsets is used to output only  the  part  of  the  line  that  matched
+       (either shown literally, or as an offset), scanning resumes immediately
+       following the match, so that further matches on the same  line  can  be
+       found.  If  there  are  multiple  patterns,  they  are all tried on the
+       remainder of the line, but patterns that follow the  one  that  matched
+       are not tried on the earlier part of the line.
+
+       This  behaviour  means  that  the  order in which multiple patterns are
+       specified can affect the output when one of the above options is  used.
+       This  is no longer the same behaviour as GNU grep, which now manages to
+       display earlier matches for later patterns (as  long  as  there  is  no
+       overlap).
+
+       Patterns  that can match an empty string are accepted, but empty string
+       matches   are   never   recognized.   An   example   is   the   pattern
+       "(super)?(man)?",  in  which  all components are optional. This pattern
+       finds all occurrences of both "super" and  "man";  the  output  differs
+       from  matching  with  "super|man" when only the matching substrings are
+       being shown.
+
+       If the LC_ALL or LC_CTYPE environment variable is set,  pcre2grep  uses
+       the value to set a locale when calling the PCRE2 library.  The --locale
+       option can be used to override this.
+
+
+SUPPORT FOR COMPRESSED FILES
+
+       It is possible to compile pcre2grep so that it uses libz or  libbz2  to
+       read  files  whose names end in .gz or .bz2, respectively. You can find
+       out whether your binary has support for one or both of these file types
+       by running it with the --help option. If the appropriate support is not
+       present, files are treated as plain text. The standard input is  always
+       so treated.
+
+
+BINARY FILES
+
+       By  default,  a  file that contains a binary zero byte within the first
+       1024 bytes is identified as a binary file, and is processed  specially.
+       (GNU  grep  also  identifies  binary  files  in  this  manner.) See the
+       --binary-files option for a means of changing the way binary files  are
+       handled.
+
+
+OPTIONS
+
+       The  order  in  which some of the options appear can affect the output.
+       For example, both the -h and -l options affect  the  printing  of  file
+       names.  Whichever  comes later in the command line will be the one that
+       takes effect. Similarly, except where noted  below,  if  an  option  is
+       given  twice,  the  later setting is used. Numerical values for options
+       may be followed by K  or  M,  to  signify  multiplication  by  1024  or
+       1024*1024 respectively.
+
+       --        This terminates the list of options. It is useful if the next
+                 item on the command line starts with a hyphen but is  not  an
+                 option.  This allows for the processing of patterns and file-
+                 names that start with hyphens.
+
+       -A number, --after-context=number
+                 Output number lines of context after each matching  line.  If
+                 filenames and/or line numbers are being output, a hyphen sep-
+                 arator is used instead of a colon for the  context  lines.  A
+                 line  containing  "--" is output between each group of lines,
+                 unless they are in fact contiguous in  the  input  file.  The
+                 value  of number is expected to be relatively small. However,
+                 pcre2grep guarantees to have  up  to  8K  of  following  text
+                 available for context output.
+
+       -a, --text
+                 Treat  binary  files as text. This is equivalent to --binary-
+                 files=text.
+
+       -B number, --before-context=number
+                 Output number lines of context before each matching line.  If
+                 filenames and/or line numbers are being output, a hyphen sep-
+                 arator is used instead of a colon for the  context  lines.  A
+                 line  containing  "--" is output between each group of lines,
+                 unless they are in fact contiguous in  the  input  file.  The
+                 value  of number is expected to be relatively small. However,
+                 pcre2grep guarantees to have  up  to  8K  of  preceding  text
+                 available for context output.
+
+       --binary-files=word
+                 Specify  how binary files are to be processed. If the word is
+                 "binary" (the default),  pattern  matching  is  performed  on
+                 binary  files,  but  the  only  output is "Binary file <name>
+                 matches" when a match succeeds. If the word is "text",  which
+                 is  equivalent  to  the -a or --text option, binary files are
+                 processed in the same way as any other file.  In  this  case,
+                 when  a  match  succeeds,  the  output may be binary garbage,
+                 which can have nasty effects if sent to a  terminal.  If  the
+                 word  is  "without-match",  which  is  equivalent  to  the -I
+                 option, binary files are  not  processed  at  all;  they  are
+                 assumed not to be of interest.
+
+       --buffer-size=number
+                 Set  the  parameter that controls how much memory is used for
+                 buffering files that are being scanned.
+
+       -C number, --context=number
+                 Output number lines of context both  before  and  after  each
+                 matching  line.  This is equivalent to setting both -A and -B
+                 to the same value.
+
+       -c, --count
+                 Do not output individual lines from the files that are  being
+                 scanned; instead output the number of lines that would other-
+                 wise have been shown. If no lines are  selected,  the  number
+                 zero  is  output.  If  several files are are being scanned, a
+                 count is output for each of them. However,  if  the  --files-
+                 with-matches  option  is  also  used,  only those files whose
+                 counts are greater than zero are listed. When -c is used, the
+                 -A, -B, and -C options are ignored.
+
+       --colour, --color
+                 If this option is given without any data, it is equivalent to
+                 "--colour=auto".  If data is required, it must  be  given  in
+                 the same shell item, separated by an equals sign.
+
+       --colour=value, --color=value
+                 This option specifies under what circumstances the parts of a
+                 line that matched a pattern should be coloured in the output.
+                 By  default,  the output is not coloured. The value (which is
+                 optional, see above) may be "never", "always", or "auto".  In
+                 the  latter case, colouring happens only if the standard out-
+                 put is connected to a terminal. More resources are used  when
+                 colouring is enabled, because pcre2grep has to search for all
+                 possible matches in a line, not just one, in order to  colour
+                 them all.
+
+                 The colour that is used can be specified by setting the envi-
+                 ronment variable  PCRE2GREP_COLOUR  or  PCRE2GREP_COLOR.  The
+                 value  of  this  variable  should be a string of two numbers,
+                 separated by a semicolon. They are copied directly  into  the
+                 control  string  for  setting  colour on a terminal, so it is
+                 your responsibility to ensure that they make sense.  If  nei-
+                 ther  of  the  environment  variables  is set, the default is
+                 "1;31", which gives red.
+
+       -D action, --devices=action
+                 If an input path is  not  a  regular  file  or  a  directory,
+                 "action"  specifies  how  it is to be processed. Valid values
+                 are "read" (the default) or "skip" (silently skip the path).
+
+       -d action, --directories=action
+                 If an input path is a directory, "action" specifies how it is
+                 to  be  processed.   Valid  values are "read" (the default in
+                 non-Windows environments, for compatibility with  GNU  grep),
+                 "recurse"  (equivalent to the -r option), or "skip" (silently
+                 skip the path, the default in Windows environments).  In  the
+                 "read"  case,  directories  are read as if they were ordinary
+                 files. In some operating systems  the  effect  of  reading  a
+                 directory like this is an immediate end-of-file; in others it
+                 may provoke an error.
+
+       -e pattern, --regex=pattern, --regexp=pattern
+                 Specify a pattern to be matched. This option can be used mul-
+                 tiple times in order to specify several patterns. It can also
+                 be used as a way of specifying a single pattern  that  starts
+                 with  a hyphen. When -e is used, no argument pattern is taken
+                 from the command line; all  arguments  are  treated  as  file
+                 names.  There is no limit to the number of patterns. They are
+                 applied to each line in the order in which they  are  defined
+                 until one matches.
+
+                 If  -f is used with -e, the command line patterns are matched
+                 first, followed by the patterns from the file(s), independent
+                 of  the order in which these options are specified. Note that
+                 multiple use of -e is not the same as a single  pattern  with
+                 alternatives. For example, X|Y finds the first character in a
+                 line that is X or Y, whereas if the two  patterns  are  given
+                 separately, with X first, pcre2grep finds X if it is present,
+                 even if it follows Y in the line. It finds Y only if there is
+                 no  X  in  the line. This matters only if you are using -o or
+                 --colo(u)r to show the part(s) of the line that matched.
+
+       --exclude=pattern
+                 Files (but not directories) whose names match the pattern are
+                 skipped  without  being processed. This applies to all files,
+                 whether listed on the command  line,  obtained  from  --file-
+                 list, or by scanning a directory. The pattern is a PCRE2 reg-
+                 ular expression, and is matched against the  final  component
+                 of  the  file  name,  not the entire path. The -F, -w, and -x
+                 options do not apply to this pattern. The option may be given
+                 any number of times in order to specify multiple patterns. If
+                 a file name matches both an --include and an  --exclude  pat-
+                 tern, it is excluded. There is no short form for this option.
+
+       --exclude-from=filename
+                 Treat  each  non-empty  line  of  the file as the data for an
+                 --exclude option. What constitutes a newline when reading the
+                 file  is the operating system's default. The --newline option
+                 has no effect on this option. This option may be  given  more
+                 than once in order to specify a number of files to read.
+
+       --exclude-dir=pattern
+                 Directories whose names match the pattern are skipped without
+                 being processed, whatever  the  setting  of  the  --recursive
+                 option.  This  applies  to all directories, whether listed on
+                 the command line, obtained from --file-list, or by scanning a
+                 parent  directory. The pattern is a PCRE2 regular expression,
+                 and is matched against the final component of  the  directory
+                 name,  not the entire path. The -F, -w, and -x options do not
+                 apply to this pattern. The option may be given any number  of
+                 times  in order to specify more than one pattern. If a direc-
+                 tory matches both  --include-dir  and  --exclude-dir,  it  is
+                 excluded. There is no short form for this option.
+
+       -F, --fixed-strings
+                 Interpret  each  data-matching  pattern  as  a  list of fixed
+                 strings, separated by  newlines,  instead  of  as  a  regular
+                 expression.  What  constitutes  a newline for this purpose is
+                 controlled by the --newline option. The -w (match as a  word)
+                 and  -x (match whole line) options can be used with -F.  They
+                 apply to each of the fixed strings. A line is selected if any
+                 of the fixed strings are found in it (subject to -w or -x, if
+                 present). This option applies only to the patterns  that  are
+                 matched  against  the contents of files; it does not apply to
+                 patterns specified by  any  of  the  --include  or  --exclude
+                 options.
+
+       -f filename, --file=filename
+                 Read  patterns  from  the  file, one per line, and match them
+                 against each line of input. What constitutes a  newline  when
+                 reading  the  file  is  the  operating  system's default. The
+                 --newline option has no effect on this option. Trailing white
+                 space is removed from each line, and blank lines are ignored.
+                 An empty file contains  no  patterns  and  therefore  matches
+                 nothing. See also the comments about multiple patterns versus
+                 a single pattern with alternatives in the description  of  -e
+                 above.
+
+                 If  this  option  is  given more than once, all the specified
+                 files are read. A data line is output if any of the  patterns
+                 match  it.  A  filename  can  be given as "-" to refer to the
+                 standard input. When -f is used, patterns  specified  on  the
+                 command  line  using  -e may also be present; they are tested
+                 before the file's patterns.  However,  no  other  pattern  is
+                 taken from the command line; all arguments are treated as the
+                 names of paths to be searched.
+
+       --file-list=filename
+                 Read a list of  files  and/or  directories  that  are  to  be
+                 scanned  from  the  given  file, one per line. Trailing white
+                 space is removed from each line, and blank lines are ignored.
+                 These  paths  are processed before any that are listed on the
+                 command line. The filename can be given as "-"  to  refer  to
+                 the standard input.  If --file and --file-list are both spec-
+                 ified as "-", patterns are read first. This  is  useful  only
+                 when  the  standard  input  is a terminal, from which further
+                 lines (the list of files) can be read  after  an  end-of-file
+                 indication.  If  this option is given more than once, all the
+                 specified files are read.
+
+       --file-offsets
+                 Instead of showing lines or parts of lines that  match,  show
+                 each  match  as  an  offset  from the start of the file and a
+                 length, separated by a comma. In this  mode,  no  context  is
+                 shown.  That  is,  the -A, -B, and -C options are ignored. If
+                 there is more than one match in a line, each of them is shown
+                 separately.  This  option  is mutually exclusive with --line-
+                 offsets and --only-matching.
+
+       -H, --with-filename
+                 Force the inclusion of the filename at the  start  of  output
+                 lines  when searching a single file. By default, the filename
+                 is not shown in this case. For matching lines,  the  filename
+                 is followed by a colon; for context lines, a hyphen separator
+                 is used. If a line number is also being  output,  it  follows
+                 the file name.
+
+       -h, --no-filename
+                 Suppress  the output filenames when searching multiple files.
+                 By default, filenames  are  shown  when  multiple  files  are
+                 searched.  For  matching lines, the filename is followed by a
+                 colon; for context lines, a hyphen separator is used.   If  a
+                 line number is also being output, it follows the file name.
+
+       --help    Output  a  help  message, giving brief details of the command
+                 options and file type support, and then exit.  Anything  else
+                 on the command line is ignored.
+
+       -I        Treat  binary  files as never matching. This is equivalent to
+                 --binary-files=without-match.
+
+       -i, --ignore-case
+                 Ignore upper/lower case distinctions during comparisons.
+
+       --include=pattern
+                 If any --include patterns are specified, the only files  that
+                 are  processed  are those that match one of the patterns (and
+                 do not match an --exclude  pattern).  This  option  does  not
+                 affect  directories,  but  it  applies  to all files, whether
+                 listed on the command line, obtained from --file-list, or  by
+                 scanning  a directory. The pattern is a PCRE2 regular expres-
+                 sion, and is matched against the final component of the  file
+                 name,  not the entire path. The -F, -w, and -x options do not
+                 apply to this pattern. The option may be given any number  of
+                 times.  If  a  file  name  matches  both  an --include and an
+                 --exclude pattern, it is excluded.  There is  no  short  form
+                 for this option.
+
+       --include-from=filename
+                 Treat  each  non-empty  line  of  the file as the data for an
+                 --include option. What constitutes a newline for this purpose
+                 is  the  operating system's default. The --newline option has
+                 no effect on this option. This option may be given any number
+                 of times; all the files are read.
+
+       --include-dir=pattern
+                 If  any --include-dir patterns are specified, the only direc-
+                 tories that are processed are those that  match  one  of  the
+                 patterns  (and  do  not match an --exclude-dir pattern). This
+                 applies to all directories, whether  listed  on  the  command
+                 line,  obtained  from  --file-list,  or  by scanning a parent
+                 directory. The pattern is a PCRE2 regular expression, and  is
+                 matched  against  the  final component of the directory name,
+                 not the entire path. The -F, -w, and -x options do not  apply
+                 to this pattern. The option may be given any number of times.
+                 If a directory matches both --include-dir and  --exclude-dir,
+                 it is excluded. There is no short form for this option.
+
+       -L, --files-without-match
+                 Instead  of  outputting lines from the files, just output the
+                 names of the files that do not contain any lines  that  would
+                 have  been  output. Each file name is output once, on a sepa-
+                 rate line.
+
+       -l, --files-with-matches
+                 Instead of outputting lines from the files, just  output  the
+                 names of the files containing lines that would have been out-
+                 put. Each file name is  output  once,  on  a  separate  line.
+                 Searching  normally stops as soon as a matching line is found
+                 in a file. However, if the -c (count) option  is  also  used,
+                 matching  continues in order to obtain the correct count, and
+                 those files that have at least one  match  are  listed  along
+                 with their counts. Using this option with -c is a way of sup-
+                 pressing the listing of files with no matches.
+
+       --label=name
+                 This option supplies a name to be used for the standard input
+                 when file names are being output. If not supplied, "(standard
+                 input)" is used. There is no short form for this option.
+
+       --line-buffered
+                 When this option is given, input is read and  processed  line
+                 by  line,  and  the  output  is  flushed after each write. By
+                 default, input is read in large chunks, unless pcre2grep  can
+                 determine  that  it is reading from a terminal (which is cur-
+                 rently possible only in Unix-like  environments).  Output  to
+                 terminal  is  normally automatically flushed by the operating
+                 system. This option can be useful when the input or output is
+                 attached to a pipe and you do not want pcre2grep to buffer up
+                 large amounts of data. However, its use will  affect  perfor-
+                 mance, and the -M (multiline) option ceases to work.
+
+       --line-offsets
+                 Instead  of  showing lines or parts of lines that match, show
+                 each match as a line number, the offset from the start of the
+                 line,  and a length. The line number is terminated by a colon
+                 (as usual; see the -n option), and the offset and length  are
+                 separated  by  a  comma.  In  this mode, no context is shown.
+                 That is, the -A, -B, and -C options are ignored. If there  is
+                 more  than  one  match in a line, each of them is shown sepa-
+                 rately. This option is mutually exclusive with --file-offsets
+                 and --only-matching.
+
+       --locale=locale-name
+                 This  option specifies a locale to be used for pattern match-
+                 ing. It overrides the value in the LC_ALL or  LC_CTYPE  envi-
+                 ronment  variables.  If  no  locale  is  specified, the PCRE2
+                 library's default (usually the "C" locale) is used. There  is
+                 no short form for this option.
+
+       --match-limit=number
+                 Processing  some  regular  expression  patterns can require a
+                 very large amount of memory, leading in some cases to a  pro-
+                 gram  crash  if  not enough is available.  Other patterns may
+                 take a very long time to search  for  all  possible  matching
+                 strings.   The   pcre2_exec()  function  that  is  called  by
+                 pcre2grep to do the matching  has  two  parameters  that  can
+                 limit the resources that it uses.
+
+                 The   --match-limit  option  provides  a  means  of  limiting
+                 resource usage when processing patterns that are not going to
+                 match, but which have a very large number of possibilities in
+                 their search trees. The classic example  is  a  pattern  that
+                 uses nested unlimited repeats. Internally, PCRE2 uses a func-
+                 tion called match()  which  it  calls  repeatedly  (sometimes
+                 recursively).  The  limit  set by --match-limit is imposed on
+                 the number of times this function is called during  a  match,
+                 which  has  the effect of limiting the amount of backtracking
+                 that can take place.
+
+                 The --recursion-limit option is similar to --match-limit, but
+                 instead of limiting the total number of times that match() is
+                 called, it limits the depth of recursive calls, which in turn
+                 limits  the  amount of memory that can be used. The recursion
+                 depth is a smaller number than the  total  number  of  calls,
+                 because not all calls to match() are recursive. This limit is
+                 of use only if it is set smaller than --match-limit.
+
+                 There are no short forms for these options. The default  set-
+                 tings  are specified when the PCRE2 library is compiled, with
+                 the default default being 10 million.
+
+       -M, --multiline
+                 Allow patterns to match more than one line. When this  option
+                 is given, patterns may usefully contain literal newline char-
+                 acters and internal occurrences of ^ and  $  characters.  The
+                 output  for  a  successful match may consist of more than one
+                 line, the last of which is the one in which the match  ended.
+                 If the matched string ends with a newline sequence the output
+                 ends at the end of that line.
+
+                 When this option is set, the PCRE2 library is called in "mul-
+                 tiline"  mode.   There is a limit to the number of lines that
+                 can be matched, imposed by the way that pcre2grep buffers the
+                 input file as it scans it. However, pcre2grep ensures that at
+                 least 8K characters or the rest of the document (whichever is
+                 the  shorter)  are  available for forward matching, and simi-
+                 larly the previous 8K characters (or all the previous charac-
+                 ters,  if  fewer  than 8K) are guaranteed to be available for
+                 lookbehind assertions. This option does not work  when  input
+                 is read line by line (see --line-buffered.)
+
+       -N newline-type, --newline=newline-type
+                 The  PCRE2  library  supports  five different conventions for
+                 indicating the ends of lines. They are  the  single-character
+                 sequences  CR  (carriage  return) and LF (linefeed), the two-
+                 character sequence CRLF, an "anycrlf" convention, which  rec-
+                 ognizes  any  of the preceding three types, and an "any" con-
+                 vention, in which any Unicode line ending sequence is assumed
+                 to  end a line. The Unicode sequences are the three just men-
+                 tioned, plus  VT  (vertical  tab,  U+000B),  FF  (form  feed,
+                 U+000C),   NEL  (next  line,  U+0085),  LS  (line  separator,
+                 U+2028), and PS (paragraph separator, U+2029).
+
+                 When the  PCRE2  library  is  built,  a  default  line-ending
+                 sequence   is  specified.   This  is  normally  the  standard
+                 sequence for the operating system. Unless otherwise specified
+                 by  this  option,  pcre2grep uses the library's default.  The
+                 possible values for this option are CR, LF, CRLF, ANYCRLF, or
+                 ANY.  This  makes  it possible to use pcre2grep to scan files
+                 that have come from other environments without having to mod-
+                 ify  their  line  endings.  If the data that is being scanned
+                 does not agree  with  the  convention  set  by  this  option,
+                 pcre2grep  may  behave in strange ways. Note that this option
+                 does not apply to files specified by the -f,  --exclude-from,
+                 or  --include-from  options,  which  are  expected to use the
+                 operating system's standard newline sequence.
+
+       -n, --line-number
+                 Precede each output line by its line number in the file, fol-
+                 lowed  by  a colon for matching lines or a hyphen for context
+                 lines. If the filename is also being output, it precedes  the
+                 line number. This option is forced if --line-offsets is used.
+
+       --no-jit  If  the  PCRE2 library is built with support for just-in-time
+                 compiling (which speeds up matching), pcre2grep automatically
+                 makes use of this, unless it was explicitly disabled at build
+                 time. This option can be used to disable the use  of  JIT  at
+                 run  time. It is provided for testing and working round prob-
+                 lems.  It should never be needed in normal use.
+
+       -o, --only-matching
+                 Show only the part of the line that matched a pattern instead
+                 of  the  whole  line. In this mode, no context is shown. That
+                 is, the -A, -B, and -C options are ignored. If there is  more
+                 than  one  match in a line, each of them is shown separately.
+                 If -o is combined with -v (invert the sense of the  match  to
+                 find  non-matching  lines),  no  output is generated, but the
+                 return code is set appropriately. If the matched  portion  of
+                 the  line is empty, nothing is output unless the file name or
+                 line number are being printed, in which case they  are  shown
+                 on an otherwise empty line. This option is mutually exclusive
+                 with --file-offsets and --line-offsets.
+
+       -onumber, --only-matching=number
+                 Show only the part of the line  that  matched  the  capturing
+                 parentheses of the given number. Up to 32 capturing parenthe-
+                 ses are supported, and -o0 is equivalent to -o without a num-
+                 ber.  Because  these options can be given without an argument
+                 (see above), if an argument is present, it must be  given  in
+                 the  same  shell item, for example, -o3 or --only-matching=2.
+                 The comments given for the non-argument case above also apply
+                 to  this  case. If the specified capturing parentheses do not
+                 exist in the pattern, or were not set in the  match,  nothing
+                 is  output  unless  the  file  name  or line number are being
+                 printed.
+
+                 If this option is given multiple times,  multiple  substrings
+                 are  output, in the order the options are given. For example,
+                 -o3 -o1 -o3 causes the substrings matched by capturing paren-
+                 theses  3  and  1  and then 3 again to be output. By default,
+                 there is no separator (but see the next option).
+
+       --om-separator=text
+                 Specify a separating string for multiple occurrences  of  -o.
+                 The  default is an empty string. Separating strings are never
+                 coloured.
+
+       -q, --quiet
+                 Work quietly, that is, display nothing except error messages.
+                 The  exit  status  indicates  whether or not any matches were
+                 found.
+
+       -r, --recursive
+                 If any given path is a directory, recursively scan the  files
+                 it  contains, taking note of any --include and --exclude set-
+                 tings. By default, a directory is read as a normal  file;  in
+                 some  operating  systems this gives an immediate end-of-file.
+                 This option is a shorthand  for  setting  the  -d  option  to
+                 "recurse".
+
+       --recursion-limit=number
+                 See --match-limit above.
+
+       -s, --no-messages
+                 Suppress  error  messages  about  non-existent  or unreadable
+                 files. Such files are quietly skipped.  However,  the  return
+                 code is still 2, even if matches were found in other files.
+
+       -u, --utf-8
+                 Operate in UTF-8 mode. This option is available only if PCRE2
+                 has been compiled with UTF-8 support. All patterns (including
+                 those  for  any --exclude and --include options) and all sub-
+                 ject lines that are scanned must be valid  strings  of  UTF-8
+                 characters.
+
+       -V, --version
+                 Write  the version numbers of pcre2grep and the PCRE2 library
+                 to the standard output and then exit. Anything  else  on  the
+                 command line is ignored.
+
+       -v, --invert-match
+                 Invert  the  sense  of  the match, so that lines which do not
+                 match any of the patterns are the ones that are found.
+
+       -w, --word-regex, --word-regexp
+                 Force the patterns to match only whole words. This is equiva-
+                 lent  to  having \b at the start and end of the pattern. This
+                 option applies only to the patterns that are matched  against
+                 the  contents  of files; it does not apply to patterns speci-
+                 fied by any of the --include or --exclude options.
+
+       -x, --line-regex, --line-regexp
+                 Force the patterns to be anchored (each must  start  matching
+                 at  the beginning of a line) and in addition, require them to
+                 match entire lines. This is equivalent  to  having  ^  and  $
+                 characters at the start and end of each alternative branch in
+                 every pattern. This option applies only to the patterns  that
+                 are  matched against the contents of files; it does not apply
+                 to patterns specified by any of the  --include  or  --exclude
+                 options.
+
+
+ENVIRONMENT VARIABLES
+
+       The  environment  variables  LC_ALL  and LC_CTYPE are examined, in that
+       order, for a locale. The first one that is set is  used.  This  can  be
+       overridden  by  the  --locale  option.  If  no locale is set, the PCRE2
+       library's default (usually the "C" locale) is used.
+
+
+NEWLINES
+
+       The -N (--newline) option allows pcre2grep to scan files with different
+       newline conventions from the default. Any parts of the input files that
+       are written to the standard output are copied identically,  with  what-
+       ever  newline sequences they have in the input. However, the setting of
+       this option does not affect the interpretation of  files  specified  by
+       the -f, --exclude-from, or --include-from options, which are assumed to
+       use the operating system's  standard  newline  sequence,  nor  does  it
+       affect  the way in which pcre2grep writes informational messages to the
+       standard error and output streams. For these it uses the string "\n" to
+       indicate  newlines,  relying on the C I/O library to convert this to an
+       appropriate sequence.
+
+
+OPTIONS COMPATIBILITY
+
+       Many of the short and long forms of pcre2grep's options are the same as
+       in  the GNU grep program. Any long option of the form --xxx-regexp (GNU
+       terminology) is also available as --xxx-regex (PCRE2 terminology). How-
+       ever,  the  --file-list, --file-offsets, --include-dir, --line-offsets,
+       --locale, --match-limit, -M, --multiline, -N,  --newline,  --om-separa-
+       tor,  --recursion-limit,  -u,  and  --utf-8  options  are  specific  to
+       pcre2grep, as is the use of the --only-matching option with a capturing
+       parentheses number.
+
+       Although  most  of the common options work the same way, a few are dif-
+       ferent in pcre2grep. For example, the --include option's argument is  a
+       glob  for GNU grep, but a regular expression for pcre2grep. If both the
+       -c and -l options are given, GNU grep lists only  file  names,  without
+       counts, but pcre2grep gives the counts.
+
+
+OPTIONS WITH DATA
+
+       There are four different ways in which an option with data can be spec-
+       ified.  If a short form option is used, the  data  may  follow  immedi-
+       ately, or (with one exception) in the next command line item. For exam-
+       ple:
+
+         -f/some/file
+         -f /some/file
+
+       The exception is the -o option, which may appear with or without  data.
+       Because  of this, if data is present, it must follow immediately in the
+       same item, for example -o3.
+
+       If a long form option is used, the data may appear in the same  command
+       line  item,  separated by an equals character, or (with two exceptions)
+       it may appear in the next command line item. For example:
+
+         --file=/some/file
+         --file /some/file
+
+       Note, however, that if you want to supply a file name beginning with  ~
+       as  data  in  a  shell  command,  and have the shell expand ~ to a home
+       directory, you must separate the file name from the option, because the
+       shell does not treat ~ specially unless it is at the start of an item.
+
+       The  exceptions  to the above are the --colour (or --color) and --only-
+       matching options, for which the data  is  optional.  If  one  of  these
+       options  does  have  data, it must be given in the first form, using an
+       equals character. Otherwise pcre2grep will assume that it has no data.
+
+
+MATCHING ERRORS
+
+       It is possible to supply a regular expression that takes  a  very  long
+       time  to  fail  to  match certain lines. Such patterns normally involve
+       nested indefinite repeats, for example: (a+)*\d when matched against  a
+       line  of  a's  with  no  final digit. The PCRE2 matching function has a
+       resource limit that causes it to abort in these circumstances. If  this
+       happens,  pcre2grep  outputs  an error message and the line that caused
+       the problem to the standard error stream. If there  are  more  than  20
+       such errors, pcre2grep gives up.
+
+       The  --match-limit  option  of pcre2grep can be used to set the overall
+       resource limit; there is a second option called --recursion-limit  that
+       sets  a limit on the amount of memory (usually stack) that is used (see
+       the discussion of these options above).
+
+
+DIAGNOSTICS
+
+       Exit status is 0 if any matches were found, 1 if no matches were found,
+       and  2  for syntax errors, overlong lines, non-existent or inaccessible
+       files (even if matches were found in other files) or too many  matching
+       errors. Using the -s option to suppress error messages about inaccessi-
+       ble files does not affect the return code.
+
+
+SEE ALSO
+
+       pcre2pattern(3), pcre2syntax(3), pcre2test(1).
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 28 September 2014
+       Copyright (c) 1997-2014 University of Cambridge.
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index f85f25e..71022ac 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -2997,7 +2997,7 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
     }
   if (do_colour)
     {
-    char *cs = getenv("pcre2grep_COLOUR");
+    char *cs = getenv("PCRE2GREP_COLOUR");
     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
     if (cs != NULL) colour_string = cs;
     }
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 19fadd1..1aa40b4 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -5094,9 +5094,9 @@ printf("  32-bit support\n");
 
 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
 if (rc != 0)
-  printf("  UTF support (Unicode version %s)\n", uversion);
+  printf("  UTF and UCP support (Unicode version %s)\n", uversion);
 else
-  printf("  No UTF support\n");
+  printf("  No UTF or UCP support\n");
 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
 if (rc != 0)
   {