Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework memory management #619

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 48 additions & 30 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -11217,6 +11217,7 @@ DEFINE_COMPILER;
backtrack_common *backtrack;
BOOL has_then_trap = FALSE;
then_trap_backtrack *save_then_trap = NULL;
size_t op_len;

SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));

Expand Down Expand Up @@ -11366,7 +11367,8 @@ while (cc < ccend)
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
case OP_XCLASS:
case OP_ECLASS:
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
op_len = GET(cc, 1);
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
Expand All @@ -11375,22 +11377,19 @@ while (cc < ccend)

case OP_REF:
case OP_REFI:
{
int op_len = PRIV(OP_lengths)[*cc];
op_len = PRIV(OP_lengths)[*cc];
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
{
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
cc += op_len;
}
}
break;

case OP_DNREF:
case OP_DNREFI:
{
int op_len = PRIV(OP_lengths)[*cc];
op_len = PRIV(OP_lengths)[*cc];
if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
Expand All @@ -11399,7 +11398,6 @@ while (cc < ccend)
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
cc += op_len;
}
}
break;

case OP_RECURSE:
Expand Down Expand Up @@ -12998,8 +12996,7 @@ int private_data_size;
PCRE2_SPTR ccend;
executable_functions *functions;
void *executable_func;
sljit_uw executable_size;
sljit_uw total_length;
sljit_uw executable_size, private_data_length, total_length;
struct sljit_label *mainloop_label = NULL;
struct sljit_label *continue_match_label;
struct sljit_label *empty_match_found_label = NULL;
Expand Down Expand Up @@ -13107,9 +13104,25 @@ ccend = bracketend(common->start);

/* Calculate the local space size on the stack. */
common->ovector_start = LOCAL0;
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
if (!common->optimized_cbracket)
/* Allocate space for temporary data structures. */
private_data_length = ccend - common->start;
/* The chance of overflow is very low, but might happen on 32 bit. */
if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))
return PCRE2_ERROR_NOMEMORY;

private_data_length *= sizeof(sljit_s32);
/* Align to 32 bit. */
total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1);
if (~(sljit_uw)0 - private_data_length < total_length)
return PCRE2_ERROR_NOMEMORY;

total_length += private_data_length;
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);
if (!common->private_data_ptrs)
return PCRE2_ERROR_NOMEMORY;

memset(common->private_data_ptrs, 0, private_data_length);
common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
#else
Expand All @@ -13123,7 +13136,7 @@ common->ovector_start += sizeof(sljit_sw);
#endif
if (!check_opcode_types(common, common->start, ccend))
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
return PCRE2_ERROR_JIT_UNSUPPORTED;
}

Expand All @@ -13135,6 +13148,7 @@ if (mode == PCRE2_JIT_COMPLETE &&
common->req_char_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}

if (mode != PCRE2_JIT_COMPLETE)
{
common->start_used_ptr = common->ovector_start;
Expand All @@ -13145,19 +13159,23 @@ if (mode != PCRE2_JIT_COMPLETE)
common->ovector_start += sizeof(sljit_sw);
}
}

if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
{
common->match_end_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}

#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
common->control_head_ptr = 1;
#endif

if (common->control_head_ptr != 0)
{
common->control_head_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}

if (common->has_set_som)
{
/* Saving the real start pointer is necessary. */
Expand All @@ -13178,16 +13196,6 @@ if (common->capture_last_ptr != 0)

SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);

total_length = ccend - common->start;
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
if (!common->private_data_ptrs)
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));

private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);

if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
Expand All @@ -13202,22 +13210,28 @@ SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
if (private_data_size > 65536)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed on one of your branches, you have modified the if (private_data_size > 65536) check.

On master, it returns PCRE2_ERROR_NOMEMORY but at one time you changed it to PCRE2_ERROR_JIT_UNSUPPORTED.

It seems odd to report "not enough memory" when you reach that private_data_size limit, since it's not an issue with the machine's RAM, but it appears to be an implementation limitation.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember. I have change it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It changes the expected file:

-JIT compilation was not successful (no more memory)
+JIT compilation was not successful (feature is not supported by the JIT compiler)

Both seem not entirely correct.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the pattern doesn't fit inside the JIT's limits, should it fall back to the interpreter? That would potentially be the main reason to change the return code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That happens regardless. Introducing a new error code would be the best.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it falls back regardless, then I guess it doesn't matter. Just change it to "PCRE2_ERROR_JIT_UNSUPPORTED" if it's simply a size limitation in the JIT engine.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Doing these things could be low hanging fruits for somebody.

{
SLJIT_FREE(common->private_data_ptrs, allocator_data);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
return PCRE2_ERROR_NOMEMORY;
return PCRE2_ERROR_JIT_UNSUPPORTED;
}

if (common->has_then)
{
common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
total_length = ccend - common->start;
common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);
if (!common->then_offsets)
{
SLJIT_FREE(common->private_data_ptrs, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
memset(common->then_offsets, 0, total_length);
set_then_offsets(common, common->start, NULL);
}

compiler = sljit_create_compiler(allocator_data);
if (!compiler)
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
if (common->has_then)
SLJIT_FREE(common->then_offsets, allocator_data);
return PCRE2_ERROR_NOMEMORY;
Comment on lines 13232 to 13235
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With all these repeated FREE statements, it's common to have a block at the end of the function with the cleanup code, which you can jump to with goto. Saves duplicating the cleanup. The cleanup block can have multiple jump targets, with fallthrough, so it cleans up stuff in reverse order to what was allocated. Or it can just check each pointer for NULL and clean up all the ones that have been alloced.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JIT does not use goto at the moment. But it is reasonable here. I expect compilers to do CSE. But you cannot trust in all compilers. Not sure what is the best. Shall we introduce goto?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind; if you don't want to use goto, then duplicating the FREE calls is OK. It's your code - it's only a maintenance concern.

}
common->compiler = compiler;
Expand Down Expand Up @@ -13309,8 +13323,9 @@ compile_matchingpath(common, common->start, ccend, &rootbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
if (common->has_then)
SLJIT_FREE(common->then_offsets, allocator_data);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
Expand Down Expand Up @@ -13365,8 +13380,9 @@ compile_backtrackingpath(common, rootbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
if (common->has_then)
SLJIT_FREE(common->then_offsets, allocator_data);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
Expand Down Expand Up @@ -13478,8 +13494,9 @@ if (common->currententry != NULL)
SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);

sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
if (common->has_then)
SLJIT_FREE(common->then_offsets, allocator_data);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
Expand Down Expand Up @@ -13631,8 +13648,9 @@ if (common->getucdtype != NULL)
}
#endif /* SUPPORT_UNICODE */

SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
if (common->has_then)
SLJIT_FREE(common->then_offsets, allocator_data);

executable_func = sljit_generate_code(compiler, 0, NULL);
executable_size = sljit_get_generated_code_size(compiler);
Expand Down
Loading
Loading