From 85bfd9c0cd5dc3acb3d5c824f82307ff4afad118 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 11 Dec 2024 15:54:56 -0800 Subject: [PATCH] Decode %r like % strings %r regular expressions need to be decoded like strings. This commit fixes %r decoding so it works like strings. --- src/prism.c | 32 +++++++++++--- test/prism/percent_delimiter_string_test.rb | 48 +++++++++++++++------ 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/prism.c b/src/prism.c index 7f30349257..d98a5cd16f 100644 --- a/src/prism.c +++ b/src/prism.c @@ -12115,9 +12115,28 @@ parser_lex(pm_parser_t *parser) { pm_regexp_token_buffer_t token_buffer = { 0 }; while (breakpoint != NULL) { + uint8_t term = lex_mode->as.regexp.terminator; + bool is_terminator = (*breakpoint == term); + + // If the terminator is newline, we need to consider \r\n _also_ a newline + // For example: `%\nfoo\r\n` + // The string should be "foo", not "foo\r" + if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') { + if (term == '\n') { + is_terminator = true; + } + + // If the terminator is a CR, but we see a CRLF, we need to + // treat the CRLF as a newline, meaning this is _not_ the + // terminator + if (term == '\r') { + is_terminator = false; + } + } + // If we hit the terminator, we need to determine what kind of // token to return. - if (*breakpoint == lex_mode->as.regexp.terminator) { + if (is_terminator) { if (lex_mode->as.regexp.nesting > 0) { parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); @@ -12347,20 +12366,21 @@ parser_lex(pm_parser_t *parser) { continue; } - bool is_terminator = (*breakpoint == lex_mode->as.string.terminator); + uint8_t term = lex_mode->as.string.terminator; + bool is_terminator = (*breakpoint == term); // If the terminator is newline, we need to consider \r\n _also_ a newline - // For example: `%\nfoo\r\n` - // The string should be "foo", not "foo\r" + // For example: `%r\nfoo\r\n` + // The string should be /foo/, not /foo\r/ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') { - if (lex_mode->as.string.terminator == '\n') { + if (term == '\n') { is_terminator = true; } // If the terminator is a CR, but we see a CRLF, we need to // treat the CRLF as a newline, meaning this is _not_ the // terminator - if (lex_mode->as.string.terminator == '\r') { + if (term == '\r') { is_terminator = false; } } diff --git a/test/prism/percent_delimiter_string_test.rb b/test/prism/percent_delimiter_string_test.rb index 4cf5990dcf..6fd825ad06 100644 --- a/test/prism/percent_delimiter_string_test.rb +++ b/test/prism/percent_delimiter_string_test.rb @@ -3,56 +3,80 @@ require_relative "test_helper" module Prism - class PercentDelimiterStringTest < TestCase + module PercentDelimiterTests def test_newline_terminator_with_lf_crlf - str = "%\n123456\r\n" + str = l "\n123456\r\n" assert_parse "123456", str end def test_newline_terminator_with_lf_crlf_with_extra_cr - str = "%\n123456\r\r\n" + str = l "\n123456\r\r\n" assert_parse "123456\r", str end def test_newline_terminator_with_crlf_pair - str = "%\r\n123456\r\n" + str = l "\r\n123456\r\n" assert_parse "123456", str end def test_newline_terminator_with_crlf_crlf_with_extra_cr - str = "%\r\n123456\r\r\n" + str = l "\r\n123456\r\r\n" assert_parse "123456\r", str end def test_newline_terminator_with_cr_cr - str = "%\r123456\r;\n" + str = l "\r123456\r;\n" assert_parse "123456", str end def test_newline_terminator_with_crlf_lf - str = "%\r\n123456\n;\n" + str = l "\r\n123456\n;\n" assert_parse "123456", str end def test_cr_crlf - str = "%\r1\r\n \r" + str = l "\r1\r\n \r" assert_parse "1\n ", str end def test_lf_crlf - str = "%\n1\r\n \n" + str = l "\n1\r\n \n" assert_parse "1", str end def test_lf_lf - str = "%\n1\n \n" + str = l "\n1\n \n" assert_parse "1", str end def assert_parse(expected, str) + assert_equal expected, find_node(str).unescaped + end + end + + class PercentDelimiterStringTest < TestCase + include PercentDelimiterTests + + def find_node(str) + tree = Prism.parse str + tree.value.breadth_first_search { |x| Prism::StringNode === x } + end + + def l(str) + "%" + str + end + end + + class PercentDelimiterRegexpTest < TestCase + include PercentDelimiterTests + + def l(str) + "%r" + str + end + + def find_node(str) tree = Prism.parse str - node = tree.value.breadth_first_search { |x| Prism::StringNode === x } - assert_equal expected, node.unescaped + tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x } end end end