From dd7d239ded2d46e39ed94fe753d34bd4496a68ca Mon Sep 17 00:00:00 2001 From: ikappaki <34983288+ikappaki@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:02:24 +0100 Subject: [PATCH] Fix a bug where the reader was double counting the CRLF newline seq (#1064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hi, can you please a fix for the line number in the metadata generated by the reader. It fixes #1063. We now check the char at -2 only if it’s: - `\n` (for `\nC` or `\r\nC` case) - `\r` (for the `\rC`, but not `\r\n`, case). where `C` is any other char. Added tests for the same. Thanks --------- Co-authored-by: ikappaki --- CHANGELOG.md | 3 +++ src/basilisp/lang/reader.py | 4 +++- tests/basilisp/reader_test.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 517bcedc..c994f8bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + * Fix a bug where the reader was double counting the CRLF newline seq in metadata (#1063) + ## [v0.2.3] ### Added * Added a compiler metadata flag for suppressing warnings when Var indirection is unavoidable (#1052) diff --git a/src/basilisp/lang/reader.py b/src/basilisp/lang/reader.py index 2c7c1418..2aba5143 100644 --- a/src/basilisp/lang/reader.py +++ b/src/basilisp/lang/reader.py @@ -277,7 +277,9 @@ def loc(self) -> Tuple[int, int]: def _update_loc(self): """Update the internal line and column buffers after a new character is added.""" - if newline_chars.match(self._buffer[-2]): + if self._buffer[-2] == "\n" or ( + self._buffer[-2] == "\r" and self._buffer[-1] != "\n" + ): self._col.append(0) self._line.append(self._line[-1] + 1) else: diff --git a/tests/basilisp/reader_test.py b/tests/basilisp/reader_test.py index d0510e0e..e561b5e9 100644 --- a/tests/basilisp/reader_test.py +++ b/tests/basilisp/reader_test.py @@ -198,6 +198,35 @@ def test_reader_lines_from_str_other_loc(self, tmp_path): l3.meta.get(reader.READER_END_COL_KW), ) + @pytest.mark.parametrize( + "evalstr,first,second", + [ + ("[5]\n(def n 123)", (1, 1, 0, 3), (2, 2, 0, 11)), + ("[5]\r(def n 123)", (1, 1, 0, 3), (2, 2, 0, 11)), + ("[5]\r\n(def n 123)", (1, 1, 0, 3), (2, 2, 0, 11)), + ("[5]\n\n(def n 123)", (1, 1, 0, 3), (3, 3, 0, 11)), + ("[5]\r\r(def n 123)", (1, 1, 0, 3), (3, 3, 0, 11)), + ("[5]\r\n\r\n(def n 123)", (1, 1, 0, 3), (3, 3, 0, 11)), + ("\n[5]\n(def n 123)", (2, 2, 0, 3), (3, 3, 0, 11)), + ("\r[5]\r(def n 123)", (2, 2, 0, 3), (3, 3, 0, 11)), + ("\r\n[5]\r\n(def n 123)", (2, 2, 0, 3), (3, 3, 0, 11)), + ], + ) + def test_reader_newlines_from_str(self, evalstr, first, second): + l0, l1 = list(reader.read_str(evalstr)) + assert first == ( + l0.meta.get(reader.READER_LINE_KW), + l0.meta.get(reader.READER_END_LINE_KW), + l0.meta.get(reader.READER_COL_KW), + l0.meta.get(reader.READER_END_COL_KW), + ) + assert second == ( + l1.meta.get(reader.READER_LINE_KW), + l1.meta.get(reader.READER_END_LINE_KW), + l1.meta.get(reader.READER_COL_KW), + l1.meta.get(reader.READER_END_COL_KW), + ) + def test_reader_lines_from_file(self, tmp_path): filename = tmp_path / "test.lpy"