Skip to content

Commit

Permalink
Improve possessive repeat support
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoltan Herczeg committed Dec 18, 2024
1 parent 413bd8a commit f2cd441
Showing 1 changed file with 105 additions and 57 deletions.
162 changes: 105 additions & 57 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1307,7 +1307,7 @@ while (cc < ccend)

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
case OP_CRPOSRANGE:
if (GET2(cc, 1) < GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
locals_size = 3 * SSIZE_OF(sw);
cc += 1 + 2 * IMM2_SIZE;
break;
Expand Down Expand Up @@ -10447,8 +10447,10 @@ else
*exact = 1;
*opcode -= OP_PLUS - OP_STAR;
}
return cc;
}
else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)

if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
{
*opcode -= OP_CRPOSSTAR - OP_POSSTAR;
*end = cc + class_len;
Expand All @@ -10458,41 +10460,36 @@ else
*exact = 1;
*opcode = OP_POSSTAR;
}
return cc;
}
else

SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
*max = GET2(cc, (class_len + IMM2_SIZE));
*exact = GET2(cc, class_len);
*end = cc + class_len + 2 * IMM2_SIZE;

if (*max == 0)
{
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
*max = GET2(cc, (class_len + IMM2_SIZE));
*exact = GET2(cc, class_len);
SLJIT_ASSERT(*exact > 1);
if (*opcode == OP_CRPOSRANGE)
*opcode = OP_POSUPTO;
else
*opcode -= OP_CRRANGE - OP_STAR;
return cc;
}

if (*max == 0)
{
if (*opcode == OP_CRPOSRANGE)
*opcode = OP_POSSTAR;
else
*opcode -= OP_CRRANGE - OP_STAR;
}
*max -= *exact;
if (*max == 0)
*opcode = OP_EXACT;
else
{
SLJIT_ASSERT(*exact > 0 || *max > 1);
if (*opcode == OP_CRPOSRANGE)
*opcode = OP_POSUPTO;
else if (*max == 1)
*opcode -= OP_CRRANGE - OP_QUERY;
else
{
*max -= *exact;
if (*max == 0)
*opcode = OP_EXACT;
else if (*max == 1)
{
if (*opcode == OP_CRPOSRANGE)
*opcode = OP_POSQUERY;
else
*opcode -= OP_CRRANGE - OP_QUERY;
}
else
{
if (*opcode == OP_CRPOSRANGE)
*opcode = OP_POSUPTO;
else
*opcode -= OP_CRRANGE - OP_UPTO;
}
}
*end = cc + class_len + 2 * IMM2_SIZE;
*opcode -= OP_CRRANGE - OP_UPTO;
}
return cc;
}
Expand Down Expand Up @@ -10593,36 +10590,39 @@ else
}

/* Handle fixed part first. */
if (exact > 1)
if (opcode != OP_POSUPTO)
{
SLJIT_ASSERT(early_fail_ptr == 0);
if (exact > 1)
{
SLJIT_ASSERT(early_fail_ptr == 0);

if (common->mode == PCRE2_JIT_COMPLETE
#ifdef SUPPORT_UNICODE
&& !common->utf
if (common->mode == PCRE2_JIT_COMPLETE
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
&& !common->utf
#endif
&& type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
&& type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
{
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
else
{
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR)
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR)
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);

if (early_fail_type == type_fail_range)
{
Expand Down Expand Up @@ -11037,6 +11037,8 @@ switch(opcode)
case OP_POSUPTO:
SLJIT_ASSERT(early_fail_ptr == 0);
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));

max += exact;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
Expand All @@ -11047,6 +11049,25 @@ switch(opcode)
tmp_offset = LOCAL3;
}

if (exact == max)
{
/* Extuni has a separate OP_TYPEEXACT opcode for fixed iterations. */
SLJIT_ASSERT(tmp_base == TMP3);
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, 0);

label = LABEL();
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, STR_PTR, 0);
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
OP2(SLJIT_ADD, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_JUMP, label);

set_jumps(no_match, LABEL());

add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS, tmp_base, tmp_offset, SLJIT_IMM, exact));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
break;
}

OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, STR_PTR, 0);
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);

Expand All @@ -11059,13 +11080,18 @@ switch(opcode)
detect_partial_match_to(common, label);

set_jumps(no_match, LABEL());

if (exact > 0)
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, tmp_base, tmp_offset, SLJIT_IMM, max - exact));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
break;
}
#endif

if (type == OP_ALLANY)
{
/* Allany has a separate OP_TYPEEXACT opcode for fixed iterations. */
SLJIT_ASSERT(exact == 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));

if (common->mode == PCRE2_JIT_COMPLETE)
Expand All @@ -11082,6 +11108,25 @@ switch(opcode)
break;
}

if (exact == max)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, 0);

detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
OP2(SLJIT_ADD, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));

set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());

add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS, tmp_base, tmp_offset, SLJIT_IMM, exact));
break;
}

OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);

detect_partial_match(common, &no_match);
Expand All @@ -11095,6 +11140,9 @@ switch(opcode)
set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());

if (exact > 0)
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, tmp_base, tmp_offset, SLJIT_IMM, max - exact));
break;

case OP_POSQUERY:
Expand Down

0 comments on commit f2cd441

Please sign in to comment.