From 4ef0c51d2bbe1c572840cbb14c3080b1192e83d2 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Tue, 30 Nov 2021 16:34:39 +0000
Subject: [PATCH] Interpret NULL pointer, zero length as an empty string for
 subjects and replacements.

---
 ChangeLog              |   5 ++
 doc/html/pcre2api.html |  36 ++++++----
 doc/html/pcre2jit.html |  26 +++----
 doc/pcre2.txt          | 152 ++++++++++++++++++++++-------------------
 doc/pcre2api.3         |  37 ++++++----
 doc/pcre2jit.3         |  28 ++++----
 src/pcre2_dfa_match.c  |   4 ++
 src/pcre2_error.c      |   2 +-
 src/pcre2_match.c      |   4 ++
 src/pcre2_substitute.c |  10 ++-
 testdata/testinput17   |   3 +
 testdata/testinput18   |   5 ++
 testdata/testinput2    |  21 ++++++
 testdata/testoutput17  |   4 ++
 testdata/testoutput18  |   7 ++
 testdata/testoutput2   |  28 ++++++++
 16 files changed, 241 insertions(+), 131 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d558da3..506be5f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -34,6 +34,11 @@ substituting.
 
 12. Add check for NULL replacement to pcre2_substitute().
 
+13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and 
+pcre2_substitute(), and the replacement argument of the latter, if the pointer 
+is NULL and the length is zero, treat as an empty string. Apparently a number 
+of applications treat NULL/0 in this way.
+
 
 Version 10.39 29-October-2021
 -----------------------------
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index fab6e4a..dd34982 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -2640,7 +2640,9 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
 <i>startoffset</i>. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if <i>subject</i> is NULL and 
+<i>length</i> is zero, the subject is assumed to be an empty string. If 
+<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL.
 </P>
 <P>
 If <i>startoffset</i> is greater than the length of the subject,
@@ -3394,12 +3396,17 @@ same number causes an error at compile time.
 <P>
 This function optionally calls <b>pcre2_match()</b> and then makes a copy of the
 subject string in <i>outputbuffer</i>, replacing parts that were matched with
-the <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the <i>replacement</i> string, whose length is supplied in <b>rlength</b>, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if <i>replacement</i> is NULL and <i>rlength</i> is zero, the
+replacement is assumed to be an empty string. If <i>rlength</i> is non-zero, an
+error occurs if <i>replacement</i> is NULL.
+</P>
+<P>
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 </P>
 <P>
 If successful, <b>pcre2_substitute()</b> returns the number of substitutions
@@ -3812,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches,
 <P>
 The function <b>pcre2_dfa_match()</b> is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-<b>pcre2_dfa_match()</b> does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that <b>pcre2_dfa_match()</b> does
+not support, see the
 <a href="pcre2matching.html"><b>pcre2matching</b></a>
 documentation.
 </P>
@@ -4010,7 +4018,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 30 November 2021
 <br>
 Copyright &copy; 1997-2021 University of Cambridge.
 <br>
diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html
index e73a229..d89fa23 100644
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@@ -269,11 +269,11 @@ starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 </P>
 <P>
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
@@ -382,8 +382,8 @@ out this complicated API.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <P>
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@@ -442,10 +442,10 @@ that was not compiled.
 <P>
 When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 </P>
 <P>
 Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
@@ -466,9 +466,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC14" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 May 2019
+Last updated: 30 November 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 7e96413..9b49a76 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -2579,7 +2579,9 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
        and  offset  are  in  code units, not characters.  That is, they are in
        bytes for the 8-bit library, 16-bit code units for the 16-bit  library,
        and  32-bit  code units for the 32-bit library, whether or not UTF pro-
-       cessing is enabled.
+       cessing is enabled. As a special case, if subject is NULL and length is
+       zero,  the  subject is assumed to be an empty string. If length is non-
+       zero, an error occurs if subject is NULL.
 
        If startoffset is greater than the length of the subject, pcre2_match()
        returns  PCRE2_ERROR_BADOFFSET.  When  the starting offset is zero, the
@@ -3280,8 +3282,12 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
 
        This  function  optionally calls pcre2_match() and then makes a copy of
        the subject string in outputbuffer, replacing parts that  were  matched
-       with  the replacement string, whose length is supplied in rlength. This
-       can be given as PCRE2_ZERO_TERMINATED  for  a  zero-terminated  string.
+       with the replacement string, whose length is supplied in rlength, which
+       can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.  As
+       a  special  case,  if  replacement is NULL and rlength is zero, the re-
+       placement is assumed to be an empty string. If rlength is non-zero,  an
+       error occurs if replacement is NULL.
+
        There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re-
        turn just the replacement string(s). The default action is  to  perform
        just  one  replacement  if  the pattern matches, but there is an option
@@ -3666,23 +3672,24 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
        The  function  pcre2_dfa_match()  is  called  to match a subject string
        against a compiled pattern, using a matching algorithm that  scans  the
        subject string just once (not counting lookaround assertions), and does
-       not backtrack.  This has different characteristics to the normal  algo-
-       rithm,  and  is not compatible with Perl. Some of the features of PCRE2
-       patterns are not supported.  Nevertheless, there are  times  when  this
-       kind  of  matching  can be useful. For a discussion of the two matching
-       algorithms, and a list of features that pcre2_dfa_match() does not sup-
-       port, see the pcre2matching documentation.
+       not backtrack (except when processing lookaround assertions). This  has
+       different  characteristics to the normal algorithm, and is not compati-
+       ble with Perl. Some of the features of  PCRE2  patterns  are  not  sup-
+       ported. Nevertheless, there are times when this kind of matching can be
+       useful. For a discussion of the two matching algorithms, and a list  of
+       features that pcre2_dfa_match() does not support, see the pcre2matching
+       documentation.
 
-       The  arguments  for  the pcre2_dfa_match() function are the same as for
+       The arguments for the pcre2_dfa_match() function are the  same  as  for
        pcre2_match(), plus two extras. The ovector within the match data block
        is used in a different way, and this is described below. The other com-
-       mon arguments are used in the same way as for pcre2_match(),  so  their
+       mon  arguments  are used in the same way as for pcre2_match(), so their
        description is not repeated here.
 
-       The  two  additional  arguments provide workspace for the function. The
-       workspace vector should contain at least 20 elements. It  is  used  for
+       The two additional arguments provide workspace for  the  function.  The
+       workspace  vector  should  contain at least 20 elements. It is used for
        keeping  track  of  multiple  paths  through  the  pattern  tree.  More
-       workspace is needed for patterns and subjects where there are a lot  of
+       workspace  is needed for patterns and subjects where there are a lot of
        potential matches.
 
        Here is an example of a simple call to pcre2_dfa_match():
@@ -3702,45 +3709,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
 
    Option bits for pcre2_dfa_match()
 
-       The  unused  bits of the options argument for pcre2_dfa_match() must be
-       zero.  The  only   bits   that   may   be   set   are   PCRE2_ANCHORED,
-       PCRE2_COPY_MATCHED_SUBJECT,  PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO-
+       The unused bits of the options argument for pcre2_dfa_match()  must  be
+       zero.   The   only   bits   that   may   be   set  are  PCRE2_ANCHORED,
+       PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL,  PCRE2_NO-
        TEOL,   PCRE2_NOTEMPTY,   PCRE2_NOTEMPTY_ATSTART,   PCRE2_NO_UTF_CHECK,
-       PCRE2_PARTIAL_HARD,    PCRE2_PARTIAL_SOFT,    PCRE2_DFA_SHORTEST,   and
-       PCRE2_DFA_RESTART. All but the last four of these are exactly the  same
+       PCRE2_PARTIAL_HARD,   PCRE2_PARTIAL_SOFT,    PCRE2_DFA_SHORTEST,    and
+       PCRE2_DFA_RESTART.  All but the last four of these are exactly the same
        as for pcre2_match(), so their description is not repeated here.
 
          PCRE2_PARTIAL_HARD
          PCRE2_PARTIAL_SOFT
 
-       These  have  the  same general effect as they do for pcre2_match(), but
-       the details are slightly different. When PCRE2_PARTIAL_HARD is set  for
-       pcre2_dfa_match(),  it  returns  PCRE2_ERROR_PARTIAL  if the end of the
+       These have the same general effect as they do  for  pcre2_match(),  but
+       the  details are slightly different. When PCRE2_PARTIAL_HARD is set for
+       pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if  the  end  of  the
        subject is reached and there is still at least one matching possibility
        that requires additional characters. This happens even if some complete
-       matches have already been found. When PCRE2_PARTIAL_SOFT  is  set,  the
-       return  code  PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
-       if the end of the subject is  reached,  there  have  been  no  complete
+       matches  have  already  been found. When PCRE2_PARTIAL_SOFT is set, the
+       return code PCRE2_ERROR_NOMATCH is converted  into  PCRE2_ERROR_PARTIAL
+       if  the  end  of  the  subject  is reached, there have been no complete
        matches, but there is still at least one matching possibility. The por-
-       tion of the string that was inspected when the  longest  partial  match
+       tion  of  the  string that was inspected when the longest partial match
        was found is set as the first matching string in both cases. There is a
-       more detailed discussion of partial and  multi-segment  matching,  with
+       more  detailed  discussion  of partial and multi-segment matching, with
        examples, in the pcre2partial documentation.
 
          PCRE2_DFA_SHORTEST
 
-       Setting  the PCRE2_DFA_SHORTEST option causes the matching algorithm to
+       Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm  to
        stop as soon as it has found one match. Because of the way the alterna-
-       tive  algorithm  works, this is necessarily the shortest possible match
+       tive algorithm works, this is necessarily the shortest  possible  match
        at the first possible matching point in the subject string.
 
          PCRE2_DFA_RESTART
 
-       When pcre2_dfa_match() returns a partial match, it is possible to  call
+       When  pcre2_dfa_match() returns a partial match, it is possible to call
        it again, with additional subject characters, and have it continue with
        the same match. The PCRE2_DFA_RESTART option requests this action; when
-       it  is  set,  the workspace and wscount options must reference the same
-       vector as before because data about the match so far is  left  in  them
+       it is set, the workspace and wscount options must  reference  the  same
+       vector  as  before  because data about the match so far is left in them
        after a partial match. There is more discussion of this facility in the
        pcre2partial documentation.
 
@@ -3748,8 +3755,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
 
        When pcre2_dfa_match() succeeds, it may have matched more than one sub-
        string in the subject. Note, however, that all the matches from one run
-       of the function start at the same point in  the  subject.  The  shorter
-       matches  are all initial substrings of the longer matches. For example,
+       of  the  function  start  at the same point in the subject. The shorter
+       matches are all initial substrings of the longer matches. For  example,
        if the pattern
 
          <.*>
@@ -3764,80 +3771,80 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
          <something> <something else>
          <something>
 
-       On success, the yield of the function is a number  greater  than  zero,
-       which  is  the  number  of  matched substrings. The offsets of the sub-
-       strings are returned in the ovector, and can be extracted by number  in
-       the  same way as for pcre2_match(), but the numbers bear no relation to
-       any capture groups that may exist in the pattern, because DFA  matching
+       On  success,  the  yield of the function is a number greater than zero,
+       which is the number of matched substrings.  The  offsets  of  the  sub-
+       strings  are returned in the ovector, and can be extracted by number in
+       the same way as for pcre2_match(), but the numbers bear no relation  to
+       any  capture groups that may exist in the pattern, because DFA matching
        does not support capturing.
 
-       Calls  to the convenience functions that extract substrings by name re-
+       Calls to the convenience functions that extract substrings by name  re-
        turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af-
-       ter  a  DFA match. The convenience functions that extract substrings by
+       ter a DFA match. The convenience functions that extract  substrings  by
        number never return PCRE2_ERROR_NOSUBSTRING.
 
-       The matched strings are stored in  the  ovector  in  reverse  order  of
-       length;  that  is,  the longest matching string is first. If there were
-       too many matches to fit into the ovector, the yield of the function  is
+       The  matched  strings  are  stored  in  the ovector in reverse order of
+       length; that is, the longest matching string is first.  If  there  were
+       too  many matches to fit into the ovector, the yield of the function is
        zero, and the vector is filled with the longest matches.
 
-       NOTE:  PCRE2's  "auto-possessification" optimization usually applies to
-       character repeats at the end of a pattern (as well as internally).  For
-       example,  the pattern "a\d+" is compiled as if it were "a\d++". For DFA
-       matching, this means that only one possible match is found. If you  re-
+       NOTE: PCRE2's "auto-possessification" optimization usually  applies  to
+       character  repeats at the end of a pattern (as well as internally). For
+       example, the pattern "a\d+" is compiled as if it were "a\d++". For  DFA
+       matching,  this means that only one possible match is found. If you re-
        ally do want multiple matches in such cases, either use an ungreedy re-
-       peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when  com-
+       peat  such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com-
        piling.
 
    Error returns from pcre2_dfa_match()
 
        The pcre2_dfa_match() function returns a negative number when it fails.
-       Many of the errors are the same  as  for  pcre2_match(),  as  described
+       Many  of  the  errors  are  the same as for pcre2_match(), as described
        above.  There are in addition the following errors that are specific to
        pcre2_dfa_match():
 
          PCRE2_ERROR_DFA_UITEM
 
-       This return is given if pcre2_dfa_match() encounters  an  item  in  the
-       pattern  that it does not support, for instance, the use of \C in a UTF
+       This  return  is  given  if pcre2_dfa_match() encounters an item in the
+       pattern that it does not support, for instance, the use of \C in a  UTF
        mode or a backreference.
 
          PCRE2_ERROR_DFA_UCOND
 
-       This return is given if pcre2_dfa_match() encounters a  condition  item
+       This  return  is given if pcre2_dfa_match() encounters a condition item
        that uses a backreference for the condition, or a test for recursion in
        a specific capture group. These are not supported.
 
          PCRE2_ERROR_DFA_UINVALID_UTF
 
-       This return is given if pcre2_dfa_match() is called for a pattern  that
-       was  compiled  with  PCRE2_MATCH_INVALID_UTF. This is not supported for
+       This  return is given if pcre2_dfa_match() is called for a pattern that
+       was compiled with PCRE2_MATCH_INVALID_UTF. This is  not  supported  for
        DFA matching.
 
          PCRE2_ERROR_DFA_WSSIZE
 
-       This return is given if pcre2_dfa_match() runs  out  of  space  in  the
+       This  return  is  given  if  pcre2_dfa_match() runs out of space in the
        workspace vector.
 
          PCRE2_ERROR_DFA_RECURSE
 
        When a recursion or subroutine call is processed, the matching function
-       calls itself recursively, using private  memory  for  the  ovector  and
-       workspace.   This  error  is given if the internal ovector is not large
-       enough. This should be extremely rare, as a  vector  of  size  1000  is
+       calls  itself  recursively,  using  private  memory for the ovector and
+       workspace.  This error is given if the internal ovector  is  not  large
+       enough.  This  should  be  extremely  rare, as a vector of size 1000 is
        used.
 
          PCRE2_ERROR_DFA_BADRESTART
 
-       When  pcre2_dfa_match()  is  called  with the PCRE2_DFA_RESTART option,
-       some plausibility checks are made on the  contents  of  the  workspace,
-       which  should  contain data about the previous partial match. If any of
+       When pcre2_dfa_match() is called  with  the  PCRE2_DFA_RESTART  option,
+       some  plausibility  checks  are  made on the contents of the workspace,
+       which should contain data about the previous partial match. If  any  of
        these checks fail, this error is given.
 
 
 SEE ALSO
 
-       pcre2build(3),   pcre2callout(3),    pcre2demo(3),    pcre2matching(3),
+       pcre2build(3),    pcre2callout(3),    pcre2demo(3),   pcre2matching(3),
        pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3).
 
 
@@ -3850,7 +3857,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 30 August 2021
+       Last updated: 30 November 2021
        Copyright (c) 1997-2021 University of Cambridge.
 ------------------------------------------------------------------------------
  
@@ -5436,7 +5443,7 @@ FREEING JIT SPECULATIVE MEMORY
        void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
 
        The JIT executable allocator does not free all memory when it is possi-
-       ble.  It expects new allocations, and keeps some free memory around  to
+       ble. It expects new allocations, and keeps some free memory  around  to
        improve  allocation  speed. However, in low memory conditions, it might
        be better to free all possible memory. You can cause this to happen  by
        calling  pcre2_jit_free_unused_memory(). Its argument is a general con-
@@ -5494,12 +5501,13 @@ JIT FAST PATH API
 
        When  you call pcre2_match(), as well as testing for invalid options, a
        number of other sanity checks are performed on the arguments. For exam-
-       ple, if the subject pointer is NULL, an immediate error is given. Also,
-       unless PCRE2_NO_UTF_CHECK is set, a UTF subject string  is  tested  for
-       validity.  In the interests of speed, these checks do not happen on the
-       JIT fast path, and if invalid data is passed, the result is undefined.
+       ple,  if the subject pointer is NULL but the length is non-zero, an im-
+       mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set,  a  UTF
+       subject string is tested for validity. In the interests of speed, these
+       checks do not happen on the JIT fast  path,  and  if  invalid  data  is
+       passed, the result is undefined.
 
-       Bypassing the sanity checks and the  pcre2_match()  wrapping  can  give
+       Bypassing  the  sanity  checks  and the pcre2_match() wrapping can give
        speedups of more than 10%.
 
 
@@ -5517,8 +5525,8 @@ AUTHOR
 
 REVISION
 
-       Last updated: 23 May 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 30 November 2021
+       Copyright (c) 1997-2021 University of Cambridge.
 ------------------------------------------------------------------------------
  
  
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index fe84fa4..30964db 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2API 3 "30 November 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -2624,7 +2624,9 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
 \fIstartoffset\fP. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and 
+\fIlength\fP is zero, the subject is assumed to be an empty string. If 
+\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL.
 .P
 If \fIstartoffset\fP is greater than the length of the subject,
 \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is
@@ -3413,12 +3415,16 @@ same number causes an error at compile time.
 .P
 This function optionally calls \fBpcre2_match()\fP and then makes a copy of the
 subject string in \fIoutputbuffer\fP, replacing parts that were matched with
-the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the
+replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an
+error occurs if \fIreplacement\fP is NULL.
+.P
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 .P
 If successful, \fBpcre2_substitute()\fP returns the number of substitutions
 that were carried out. This may be zero if no match was found, and is never
@@ -3813,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches,
 .P
 The function \fBpcre2_dfa_match()\fP is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-\fBpcre2_dfa_match()\fP does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does
+not support, see the
 .\" HREF
 \fBpcre2matching\fP
 .\"
@@ -4018,6 +4025,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
+Last updated: 30 November 2021
 Copyright (c) 1997-2021 University of Cambridge.
 .fi
diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3
index 9b77550..f0b3b15 100644
--- a/doc/pcre2jit.3
+++ b/doc/pcre2jit.3
@@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2JIT 3 "30 November 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@@ -251,11 +251,11 @@ non-sequential matches in one thread is to use callouts: if a callout function
 starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 .P
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 .P
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
 to a match context that is used by any number of patterns, as long as they are
@@ -355,8 +355,8 @@ out this complicated API.
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .fi
 .P
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@@ -416,10 +416,10 @@ that was not compiled.
 .P
 When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 .P
 Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give
 speedups of more than 10%.
@@ -445,6 +445,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 30 November 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index a97e071..004252f 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -3285,6 +3285,10 @@ rws->next = NULL;
 rws->size = RWS_BASE_SIZE;
 rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
 
+/* Recognize NULL, length 0 as an empty string. */
+
+if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
+
 /* Plausibility checks */
 
 if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index 3dee63d..d3bb466 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -253,7 +253,7 @@ static const unsigned char match_error_texts[] =
   "unknown substring\0"
   /* 50 */
   "non-unique substring name\0"
-  "NULL argument passed\0"
+  "NULL argument passed with non-zero length\0"
   "nested recursion at the same subject position\0"
   "matching depth limit exceeded\0"
   "requested value is not available\0"
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index ea8ca5d..7cfa44c 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6170,6 +6170,10 @@ PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
     PCRE2_KEEP_UNINITIALIZED;
 mb->stack_frames = (heapframe *)stack_frames_vector;
 
+/* Recognize NULL, length 0 as an empty string. */
+
+if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
+
 /* Plausibility checks */
 
 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 3d55d88..bddd6b0 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -260,9 +260,15 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
   return PCRE2_ERROR_BADOPTION;
 
-/* Validate length and find the end of the replacement. */
+/* Validate length and find the end of the replacement. A NULL replacement of 
+zero length is interpreted as an empty string. */
 
-if (replacement == NULL) return PCRE2_ERROR_NULL;
+if (replacement == NULL) 
+  {
+  if (rlength != 0) return PCRE2_ERROR_NULL;
+  replacement = (PCRE2_SPTR)""; 
+  } 
+   
 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
 repend = replacement + rlength;
 
diff --git a/testdata/testinput17 b/testdata/testinput17
index 65bbbb9..caf4c91 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -304,4 +304,7 @@
 /[aCz]/mg,firstline,newline=lf
     match\nmatch
 
+//jitfast
+    \=null_subject
+
 # End of testinput17
diff --git a/testdata/testinput18 b/testdata/testinput18
index a02521f..c1f4c22 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -135,4 +135,9 @@
     123ace
     123ace\=posix_startend=2:6
 
+//posix
+\= Expect errors
+    \=null_subject
+    abc\=null_subject
+
 # End of testdata/testinput18
diff --git a/testdata/testinput2 b/testdata/testinput2
index 4acb429..849bc2e 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5902,4 +5902,25 @@ a)"xI
     
 # --------- 
 
+# Tests for zero-length NULL to be treated as an empty string.
+
+//
+    \=null_subject
+\= Expect error     
+    abc\=null_subject
+
+//replace=[20]
+    abc\=null_replacement
+    \=null_subject
+    \=null_replacement
+
+/X*/g,replace=xy
+\= Expect error
+    >X<\=null_replacement
+
+/X+/replace=[20]
+    >XX<\=null_replacement
+
+# --------- 
+
 # End of testinput2
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index b66cfa3..b5b2cc8 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -550,4 +550,8 @@ Failed: error -47: match limit exceeded
     match\nmatch
  0: a (JIT)
 
+//jitfast
+    \=null_subject
+ 0:  (JIT)
+
 # End of testinput17
diff --git a/testdata/testoutput18 b/testdata/testoutput18
index 3e81737..55cd0cc 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@@ -215,4 +215,11 @@ Failed: POSIX code 16: bad argument at offset 0
  3: <unset>
  4: c
 
+//posix
+\= Expect errors
+    \=null_subject
+No match: POSIX code 16: bad argument
+    abc\=null_subject
+No match: POSIX code 16: bad argument
+
 # End of testdata/testinput18
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 25d608d..ce10f2b 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17674,6 +17674,34 @@ Failed: error 199 at offset 14: \K is not allowed in lookarounds (but see PCRE2_
     
 # --------- 
 
+# Tests for zero-length NULL to be treated as an empty string.
+
+//
+    \=null_subject
+ 0: 
+\= Expect error     
+    abc\=null_subject
+Failed: error -51: NULL argument passed with non-zero length
+
+//replace=[20]
+    abc\=null_replacement
+ 1: abc
+    \=null_subject
+ 1: 
+    \=null_replacement
+ 1: 
+
+/X*/g,replace=xy
+\= Expect error
+    >X<\=null_replacement
+Failed: error -51: NULL argument passed with non-zero length
+
+/X+/replace=[20]
+    >XX<\=null_replacement
+ 1: ><
+
+# --------- 
+
 # End of testinput2
 Error -70: PCRE2_ERROR_BADDATA (unknown error number)
 Error -62: bad serialized data