From 5ffe5ae7eb3d037c28843dacbdf4a925d8ced86a Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 26 Sep 2023 11:47:28 +0530 Subject: [PATCH] Add `cell` field to JSON output format --- crates/ruff_linter/src/message/json.rs | 68 ++++++++++++++++---- crates/ruff_linter/src/message/json_lines.rs | 4 +- crates/ruff_notebook/src/index.rs | 14 ++++ crates/ruff_source_file/src/lib.rs | 4 ++ crates/ruff_source_file/src/line_index.rs | 14 ++++ 5 files changed, 91 insertions(+), 13 deletions(-) diff --git a/crates/ruff_linter/src/message/json.rs b/crates/ruff_linter/src/message/json.rs index e1a2e3e36df988..7c783ab12341b0 100644 --- a/crates/ruff_linter/src/message/json.rs +++ b/crates/ruff_linter/src/message/json.rs @@ -5,7 +5,8 @@ use serde::{Serialize, Serializer}; use serde_json::{json, Value}; use ruff_diagnostics::Edit; -use ruff_source_file::SourceCode; +use ruff_notebook::NotebookIndex; +use ruff_source_file::{SourceCode, SourceLocation}; use ruff_text_size::Ranged; use crate::message::{Emitter, EmitterContext, Message}; @@ -19,9 +20,9 @@ impl Emitter for JsonEmitter { &mut self, writer: &mut dyn Write, messages: &[Message], - _context: &EmitterContext, + context: &EmitterContext, ) -> anyhow::Result<()> { - serde_json::to_writer_pretty(writer, &ExpandedMessages { messages })?; + serde_json::to_writer_pretty(writer, &ExpandedMessages { messages, context })?; Ok(()) } @@ -29,6 +30,7 @@ impl Emitter for JsonEmitter { struct ExpandedMessages<'a> { messages: &'a [Message], + context: &'a EmitterContext<'a>, } impl Serialize for ExpandedMessages<'_> { @@ -39,7 +41,7 @@ impl Serialize for ExpandedMessages<'_> { let mut s = serializer.serialize_seq(Some(self.messages.len()))?; for message in self.messages { - let value = message_to_json_value(message); + let value = message_to_json_value(message, self.context); s.serialize_element(&value)?; } @@ -47,26 +49,36 @@ impl Serialize for ExpandedMessages<'_> { } } -pub(crate) fn message_to_json_value(message: &Message) -> Value { +pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) -> Value { let source_code = message.file.to_source_code(); + let notebook_index = context.notebook_index(message.filename()); let fix = message.fix.as_ref().map(|fix| { json!({ "applicability": fix.applicability(), "message": message.kind.suggestion.as_deref(), - "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code }, + "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code, notebook_index }, }) }); - let start_location = source_code.source_location(message.start()); - let end_location = source_code.source_location(message.end()); - let noqa_location = source_code.source_location(message.noqa_offset); + let mut start_location = source_code.source_location(message.start()); + let mut end_location = source_code.source_location(message.end()); + let mut noqa_location = source_code.source_location(message.noqa_offset); + let mut notebook_cell_index = None; + + if let Some(notebook_index) = notebook_index { + notebook_cell_index = Some(notebook_index.cell(start_location.row.get()).unwrap_or(1)); + start_location = notebook_index.translated_location(&start_location); + end_location = notebook_index.translated_location(&end_location); + noqa_location = notebook_index.translated_location(&noqa_location); + } json!({ "code": message.kind.rule().noqa_code().to_string(), "url": message.kind.rule().url(), "message": message.kind.body, "fix": fix, + "cell": notebook_cell_index, "location": start_location, "end_location": end_location, "filename": message.filename(), @@ -77,6 +89,7 @@ pub(crate) fn message_to_json_value(message: &Message) -> Value { struct ExpandedEdits<'a> { edits: &'a [Edit], source_code: &'a SourceCode<'a, 'a>, + notebook_index: Option<&'a NotebookIndex>, } impl Serialize for ExpandedEdits<'_> { @@ -87,10 +100,43 @@ impl Serialize for ExpandedEdits<'_> { let mut s = serializer.serialize_seq(Some(self.edits.len()))?; for edit in self.edits { + let mut location = self.source_code.source_location(edit.start()); + let mut end_location = self.source_code.source_location(edit.end()); + + if let Some(notebook_index) = self.notebook_index { + location = notebook_index.translated_location(&location); + match ( + notebook_index.cell(location.row.get()), + notebook_index.cell(end_location.row.get()), + ) { + (Some(start_cell), Some(end_cell)) if start_cell != end_cell => { + debug_assert_eq!(end_location.column.get(), 1); + + let prev_row = end_location.row.saturating_sub(1); + end_location = SourceLocation { + row: prev_row, + column: self + .source_code + .source_location(self.source_code.line_end_exclusive(prev_row)) + .column, + }; + } + (Some(_start_cell), None) => { + debug_assert_eq!(end_location.column.get(), 1); + // No translation for the last cell edit whose end location + // is at the first character of the next cell (which doesn't + // exists). + } + _ => { + end_location = notebook_index.translated_location(&end_location); + } + } + } + let value = json!({ "content": edit.content().unwrap_or_default(), - "location": self.source_code.source_location(edit.start()), - "end_location": self.source_code.source_location(edit.end()) + "location": location, + "end_location": end_location }); s.serialize_element(&value)?; diff --git a/crates/ruff_linter/src/message/json_lines.rs b/crates/ruff_linter/src/message/json_lines.rs index 360e7ec6a71a76..048cb07fd7f86e 100644 --- a/crates/ruff_linter/src/message/json_lines.rs +++ b/crates/ruff_linter/src/message/json_lines.rs @@ -11,11 +11,11 @@ impl Emitter for JsonLinesEmitter { &mut self, writer: &mut dyn Write, messages: &[Message], - _context: &EmitterContext, + context: &EmitterContext, ) -> anyhow::Result<()> { let mut w = writer; for message in messages { - serde_json::to_writer(&mut w, &message_to_json_value(message))?; + serde_json::to_writer(&mut w, &message_to_json_value(message, context))?; w.write_all(b"\n")?; } Ok(()) diff --git a/crates/ruff_notebook/src/index.rs b/crates/ruff_notebook/src/index.rs index 23259468ee56f4..25890bc25c6075 100644 --- a/crates/ruff_notebook/src/index.rs +++ b/crates/ruff_notebook/src/index.rs @@ -1,5 +1,7 @@ use serde::{Deserialize, Serialize}; +use ruff_source_file::{OneIndexed, SourceLocation}; + /// Jupyter Notebook indexing table /// /// When we lint a jupyter notebook, we have to translate the row/column based on @@ -23,4 +25,16 @@ impl NotebookIndex { pub fn cell_row(&self, row: usize) -> Option { self.row_to_row_in_cell.get(row).copied() } + + /// Translates the given source location based on the indexing table. + /// + /// This will translate the row/column in the concatenated source code + /// to the row/column in the Jupyter Notebook. + pub fn translated_location(&self, source_location: &SourceLocation) -> SourceLocation { + SourceLocation { + row: OneIndexed::new(self.cell_row(source_location.row.get()).unwrap_or(1) as usize) + .unwrap(), + column: source_location.column, + } + } } diff --git a/crates/ruff_source_file/src/lib.rs b/crates/ruff_source_file/src/lib.rs index dd375f7e242dc6..fc2a10108de691 100644 --- a/crates/ruff_source_file/src/lib.rs +++ b/crates/ruff_source_file/src/lib.rs @@ -68,6 +68,10 @@ impl<'src, 'index> SourceCode<'src, 'index> { self.index.line_end(line, self.text) } + pub fn line_end_exclusive(&self, line: OneIndexed) -> TextSize { + self.index.line_end_exclusive(line, self.text) + } + pub fn line_range(&self, line: OneIndexed) -> TextRange { self.index.line_range(line, self.text) } diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index 9bf7b20985558d..279e68dfb84d0e 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -184,6 +184,20 @@ impl LineIndex { } } + /// Returns the [byte offset](TextSize) of the `line`'s end. + /// The offset is the end of the line, excluding the newline character ending the line (if any). + pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize { + let row_index = line.to_zero_indexed(); + let starts = self.line_starts(); + + // If start-of-line position after last line + if row_index.saturating_add(1) >= starts.len() { + contents.text_len() + } else { + starts[row_index + 1] - TextSize::new(1) + } + } + /// Returns the [`TextRange`] of the `line` with the given index. /// The start points to the first character's [byte offset](TextSize), the end up to, and including /// the newline character ending the line (if any).