Skip to content

Commit

Permalink
Improve coverage of escapes in character classes (#591)
Browse files Browse the repository at this point in the history
* Improve coverage of escapes in character classes

* Small coverage improvement in pcre2_match.c
  • Loading branch information
NWilson authored Dec 6, 2024
1 parent e8a5cd7 commit 94a0118
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 29 deletions.
23 changes: 20 additions & 3 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,7 @@ else
hptr >= ptrend || /* Hit end of input */
*hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
{
if (isclass) break; /* In a class, just treat as '\u' literal */
escape = ESC_ub; /* Special return */
ptr++; /* Skip { */
break; /* Hex escape not recognized */
Expand Down Expand Up @@ -4284,6 +4285,11 @@ while (ptr < ptrend)
char_is_literal = FALSE;
goto CLASS_LITERAL;

case ESC_k:
c = CHAR_k; /* \k is not special in a class, just like \g */
char_is_literal = FALSE;
goto CLASS_LITERAL;

case ESC_Q:
inescq = TRUE; /* Enter literal mode */
goto CLASS_CONTINUE;
Expand All @@ -4295,7 +4301,7 @@ while (ptr < ptrend)
case ESC_R:
case ESC_X:
errorcode = ERR7;
ptr--;
ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549
goto FAILED;

case ESC_N: /* Not permitted by Perl either */
Expand Down Expand Up @@ -4342,9 +4348,20 @@ while (ptr < ptrend)
#endif
break; /* End \P and \p */

default: /* All others are not allowed in a class */
/* All others are not allowed in a class */

default:
PCRE2_DEBUG_UNREACHABLE();
/* Fall through */

case ESC_A:
case ESC_Z:
case ESC_z:
case ESC_G:
case ESC_K:
case ESC_C:
errorcode = ERR7;
ptr--;
ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549
goto FAILED;
}

Expand Down
60 changes: 34 additions & 26 deletions src/pcre2_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,38 +541,46 @@ For hard partial matching, we immediately return a partial match. Otherwise,
carrying on means that a complete match on the current subject will be sought.
A partial match is returned only if no complete match can be found. */

#define CHECK_PARTIAL()\
if (Feptr >= mb->end_subject) \
{ \
SCHECK_PARTIAL(); \
}

#define SCHECK_PARTIAL()\
if (mb->partial != 0 && \
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
}
#define CHECK_PARTIAL() \
do { \
if (Feptr >= mb->end_subject) \
{ \
SCHECK_PARTIAL(); \
} \
} \
while (0)

#define SCHECK_PARTIAL() \
do { \
if (mb->partial != 0 && \
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
} \
} \
while (0)


/* These macros are used to implement backtracking. They simulate a recursive
call to the match() function by means of a local vector of frames which
remember the backtracking points. */

#define RMATCH(ra,rb)\
{\
start_ecode = ra;\
Freturn_id = rb;\
goto MATCH_RECURSE;\
L_##rb:;\
}

#define RRETURN(ra)\
{\
rrc = ra;\
goto RETURN_SWITCH;\
}
#define RMATCH(ra,rb) \
do { \
start_ecode = ra; \
Freturn_id = rb; \
goto MATCH_RECURSE; \
L_##rb:; \
} \
while (0)

#define RRETURN(ra) \
do { \
rrc = ra; \
goto RETURN_SWITCH; \
} \
while (0)



Expand Down
31 changes: 31 additions & 0 deletions testdata/testinput2
Original file line number Diff line number Diff line change
Expand Up @@ -7255,4 +7255,35 @@ a)"xI

/[A-\\]/B

/[\A]/

/[\Z]/

/[\z]/

/[\G]/

/[\K]/

/[\g<1>]/
<
g
\= Expect no match
\\

/[\k<1>]/
<
k
\= Expect no match
\\

/[\u{ 1z}]/alt_bsux,extra_alt_bsux
u
{
}
\x20
1
\= Expect no match
\\

# End of testinput2
2 changes: 2 additions & 0 deletions testdata/testinput21
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@
/(?<=ab\Cde)X/
abZdeX

/[\C]/

# End of testinput21
2 changes: 2 additions & 0 deletions testdata/testinput23
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@

/a\Cb/

/a[\C]b/

# End of testinput23
48 changes: 48 additions & 0 deletions testdata/testoutput2
Original file line number Diff line number Diff line change
Expand Up @@ -20933,6 +20933,54 @@ Failed: error 106 at offset 3: missing terminating ] for character class
End
------------------------------------------------------------------

/[\A]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\Z]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\z]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\G]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\K]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\g<1>]/
<
0: <
g
0: g
\= Expect no match
\\
No match

/[\k<1>]/
<
0: <
k
0: k
\= Expect no match
\\
No match

/[\u{ 1z}]/alt_bsux,extra_alt_bsux
u
0: u
{
0: {
}
0: }
\x20
0:
1
0: 1
\= Expect no match
\\
No match

# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Expand Down
3 changes: 3 additions & 0 deletions testdata/testoutput21
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,7 @@ Subject length lower bound = 5
abZdeX
0: X

/[\C]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

# End of testinput21
3 changes: 3 additions & 0 deletions testdata/testoutput23
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@
/a\Cb/
Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library

/a[\C]b/
Failed: error 107 at offset 3: escape sequence is invalid in character class

# End of testinput23

0 comments on commit 94a0118

Please sign in to comment.