Skip to content

Commit

Permalink
Decode %r like % strings
Browse files Browse the repository at this point in the history
%r regular expressions need to be decoded like strings.  This commit
fixes %r decoding so it works like strings.
  • Loading branch information
tenderlove committed Dec 11, 2024
1 parent 88c71b8 commit 85bfd9c
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 18 deletions.
32 changes: 26 additions & 6 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -12115,9 +12115,28 @@ parser_lex(pm_parser_t *parser) {
pm_regexp_token_buffer_t token_buffer = { 0 };

while (breakpoint != NULL) {
uint8_t term = lex_mode->as.regexp.terminator;
bool is_terminator = (*breakpoint == term);

// If the terminator is newline, we need to consider \r\n _also_ a newline
// For example: `%\nfoo\r\n`
// The string should be "foo", not "foo\r"
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
if (term == '\n') {
is_terminator = true;
}

// If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the
// terminator
if (term == '\r') {
is_terminator = false;
}
}

// If we hit the terminator, we need to determine what kind of
// token to return.
if (*breakpoint == lex_mode->as.regexp.terminator) {
if (is_terminator) {
if (lex_mode->as.regexp.nesting > 0) {
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
Expand Down Expand Up @@ -12347,20 +12366,21 @@ parser_lex(pm_parser_t *parser) {
continue;
}

bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
uint8_t term = lex_mode->as.string.terminator;
bool is_terminator = (*breakpoint == term);

// If the terminator is newline, we need to consider \r\n _also_ a newline
// For example: `%\nfoo\r\n`
// The string should be "foo", not "foo\r"
// For example: `%r\nfoo\r\n`
// The string should be /foo/, not /foo\r/
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
if (lex_mode->as.string.terminator == '\n') {
if (term == '\n') {
is_terminator = true;
}

// If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the
// terminator
if (lex_mode->as.string.terminator == '\r') {
if (term == '\r') {
is_terminator = false;
}
}
Expand Down
48 changes: 36 additions & 12 deletions test/prism/percent_delimiter_string_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,80 @@
require_relative "test_helper"

module Prism
class PercentDelimiterStringTest < TestCase
module PercentDelimiterTests
def test_newline_terminator_with_lf_crlf
str = "%\n123456\r\n"
str = l "\n123456\r\n"
assert_parse "123456", str
end

def test_newline_terminator_with_lf_crlf_with_extra_cr
str = "%\n123456\r\r\n"
str = l "\n123456\r\r\n"
assert_parse "123456\r", str
end

def test_newline_terminator_with_crlf_pair
str = "%\r\n123456\r\n"
str = l "\r\n123456\r\n"
assert_parse "123456", str
end

def test_newline_terminator_with_crlf_crlf_with_extra_cr
str = "%\r\n123456\r\r\n"
str = l "\r\n123456\r\r\n"
assert_parse "123456\r", str
end

def test_newline_terminator_with_cr_cr
str = "%\r123456\r;\n"
str = l "\r123456\r;\n"
assert_parse "123456", str
end

def test_newline_terminator_with_crlf_lf
str = "%\r\n123456\n;\n"
str = l "\r\n123456\n;\n"
assert_parse "123456", str
end

def test_cr_crlf
str = "%\r1\r\n \r"
str = l "\r1\r\n \r"
assert_parse "1\n ", str
end

def test_lf_crlf
str = "%\n1\r\n \n"
str = l "\n1\r\n \n"
assert_parse "1", str
end

def test_lf_lf
str = "%\n1\n \n"
str = l "\n1\n \n"
assert_parse "1", str
end

def assert_parse(expected, str)
assert_equal expected, find_node(str).unescaped
end
end

class PercentDelimiterStringTest < TestCase
include PercentDelimiterTests

def find_node(str)
tree = Prism.parse str
tree.value.breadth_first_search { |x| Prism::StringNode === x }
end

def l(str)
"%" + str
end
end

class PercentDelimiterRegexpTest < TestCase
include PercentDelimiterTests

def l(str)
"%r" + str
end

def find_node(str)
tree = Prism.parse str
node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
assert_equal expected, node.unescaped
tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
end
end
end

0 comments on commit 85bfd9c

Please sign in to comment.