Tidy comments about UTF case-independence.

2017-04-20 16:51:36 +00:00 · 2017-04-20 16:51:36 +00:00 · b3a6fd38b8
parent b59f00fa14
commit b3a6fd38b8
1 changed files with 13 additions and 20 deletions
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -929,7 +929,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
    /* ===================================================================== */
    /* Match a single character, caselessly. If we are at the end of the
-    subject, give up immediately. */
+    subject, give up immediately. We get here only when the pattern character 
    has at most one other case. Characters with more than two cases are coded 
    as OP_PROP with the pseudo-property PT_CLIST. */
    case OP_CHARI:
    if (Feptr >= mb->end_subject)
@ -945,10 +947,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
      Fecode++;
      GETCHARLEN(fc, Fecode, Flength);
-      /* If the pattern character's value is < 128, we have only one byte, and
+      /* If the pattern character's value is < 128, we know that its other case
-      we know that its other case must also be one byte long, so we can use the
+      (if any) is also < 128 (and therefore only one code unit long in all 
-      fast lookup table. We know that there is at least one byte left in the
+      code-unit widths), so we can use the fast lookup table. We checked above
-      subject. */
+      that there is at least one character left in the subject. */
      if (fc < 128)
        {
@ -958,32 +960,23 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        Feptr++;
        }
-      /* Otherwise we must pick up the subject character. Note that we cannot
+      /* Otherwise we must pick up the subject character and use Unicode 
-      use the value of "Flength" to check for sufficient bytes left, because the
+      property support to test its other case. Note that we cannot use the
-      other case of the character may have more or fewer bytes.  */
+      value of "Flength" to check for sufficient bytes left, because the other
      case of the character may have more or fewer code units. */
      else
        {
        uint32_t dc;
        GETCHARINC(dc, Feptr);
        Fecode += Flength;
-
+        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
        /* If we have Unicode property support, we can use it to test the other
        case of the character, if there is one. */
        if (fc != dc)
          {
 #ifdef SUPPORT_UNICODE
          if (dc != UCD_OTHERCASE(fc))
 #endif
            RRETURN(MATCH_NOMATCH);
          }
        }
      }
    else
 #endif   /* SUPPORT_UNICODE */
-    /* Not UTF mode */
+    /* Not UTF mode; use the table for characters < 256. */
      {
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);