From 85bfd9c0cd5dc3acb3d5c824f82307ff4afad118 Mon Sep 17 00:00:00 2001
From: Aaron Patterson <tenderlove@ruby-lang.org>
Date: Wed, 11 Dec 2024 15:54:56 -0800
Subject: [PATCH] Decode %r like % strings

%r regular expressions need to be decoded like strings.  This commit
fixes %r decoding so it works like strings.
---
 src/prism.c                                 | 32 +++++++++++---
 test/prism/percent_delimiter_string_test.rb | 48 +++++++++++++++------
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/src/prism.c b/src/prism.c
index 7f30349257..d98a5cd16f 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -12115,9 +12115,28 @@ parser_lex(pm_parser_t *parser) {
             pm_regexp_token_buffer_t token_buffer = { 0 };
 
             while (breakpoint != NULL) {
+                uint8_t term = lex_mode->as.regexp.terminator;
+                bool is_terminator = (*breakpoint == term);
+
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%\nfoo\r\n`
+                // The string should be "foo", not "foo\r"
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
+
                 // If we hit the terminator, we need to determine what kind of
                 // token to return.
-                if (*breakpoint == lex_mode->as.regexp.terminator) {
+                if (is_terminator) {
                     if (lex_mode->as.regexp.nesting > 0) {
                         parser->current.end = breakpoint + 1;
                         breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12347,20 +12366,21 @@ parser_lex(pm_parser_t *parser) {
                     continue;
                 }
 
-                bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
+                uint8_t term = lex_mode->as.string.terminator;
+                bool is_terminator = (*breakpoint == term);
 
                 // If the terminator is newline, we need to consider \r\n _also_ a newline
-                // For example: `%\nfoo\r\n`
-                // The string should be "foo", not "foo\r"
+                // For example: `%r\nfoo\r\n`
+                // The string should be /foo/, not /foo\r/
                 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
-                    if (lex_mode->as.string.terminator == '\n') {
+                    if (term == '\n') {
                         is_terminator = true;
                     }
 
                     // If the terminator is a CR, but we see a CRLF, we need to
                     // treat the CRLF as a newline, meaning this is _not_ the
                     // terminator
-                    if (lex_mode->as.string.terminator == '\r') {
+                    if (term == '\r') {
                         is_terminator = false;
                     }
                 }
diff --git a/test/prism/percent_delimiter_string_test.rb b/test/prism/percent_delimiter_string_test.rb
index 4cf5990dcf..6fd825ad06 100644
--- a/test/prism/percent_delimiter_string_test.rb
+++ b/test/prism/percent_delimiter_string_test.rb
@@ -3,56 +3,80 @@
 require_relative "test_helper"
 
 module Prism
-  class PercentDelimiterStringTest < TestCase
+  module PercentDelimiterTests
     def test_newline_terminator_with_lf_crlf
-      str = "%\n123456\r\n"
+      str = l "\n123456\r\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_lf_crlf_with_extra_cr
-      str = "%\n123456\r\r\n"
+      str = l "\n123456\r\r\n"
       assert_parse "123456\r", str
     end
 
     def test_newline_terminator_with_crlf_pair
-      str = "%\r\n123456\r\n"
+      str = l "\r\n123456\r\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_crlf_crlf_with_extra_cr
-      str = "%\r\n123456\r\r\n"
+      str = l "\r\n123456\r\r\n"
       assert_parse "123456\r", str
     end
 
     def test_newline_terminator_with_cr_cr
-      str = "%\r123456\r;\n"
+      str = l "\r123456\r;\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_crlf_lf
-      str = "%\r\n123456\n;\n"
+      str = l "\r\n123456\n;\n"
       assert_parse "123456", str
     end
 
     def test_cr_crlf
-      str = "%\r1\r\n \r"
+      str = l "\r1\r\n \r"
       assert_parse "1\n ", str
     end
 
     def test_lf_crlf
-      str = "%\n1\r\n \n"
+      str = l "\n1\r\n \n"
       assert_parse "1", str
     end
 
     def test_lf_lf
-      str = "%\n1\n \n"
+      str = l "\n1\n \n"
       assert_parse "1", str
     end
 
     def assert_parse(expected, str)
+      assert_equal expected, find_node(str).unescaped
+    end
+  end
+
+  class PercentDelimiterStringTest < TestCase
+    include PercentDelimiterTests
+
+    def find_node(str)
+      tree = Prism.parse str
+      tree.value.breadth_first_search { |x| Prism::StringNode === x }
+    end
+
+    def l(str)
+      "%" + str
+    end
+  end
+
+  class PercentDelimiterRegexpTest < TestCase
+    include PercentDelimiterTests
+
+    def l(str)
+      "%r" + str
+    end
+
+    def find_node(str)
       tree = Prism.parse str
-      node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
-      assert_equal expected, node.unescaped
+      tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
     end
   end
 end