kiyoon · kiyoon · Mar 13, 2023 · Mar 13, 2023 · Mar 13, 2023 · Mar 13, 2023
diff --git a/README.md b/README.md
@@ -266,7 +266,7 @@ For example:
 
 ### Setup a Jupynium file
 
-Jupynium uses a unique file format (see the `Jupynium file format` section below). This `.ju.py` file is what you will primarily be interacting with, rather than the `.ipynb` file directly. The contents of the Jupynium file are synced to the browser notebook where it can be viewed in real-time. If you want to keep a copy of the notebook, it can be downloaded as an `.ipynb` file later.
+Jupynium uses a Jupytext's percent format (see the `Jupynium file format` section below). This Jupytext file named `.ju.py` is what you will primarily be interacting with, rather than the `.ipynb` file directly. The contents of the Jupynium file are synced to the browser notebook where it can be viewed in real-time. If you want to keep a copy of the notebook, it can be downloaded as an `.ipynb` file later.
 
 First, it's recommended to set a password on your notebook (rather than using tokens):
 
@@ -289,9 +289,11 @@ There are currently 2 ways of converting an existing `.ipynb` file to a Jupynium
 **Option 1**: Use an included command line tool:
 
 ```bash
-ipynb2jupy [-h] [--stdout] file.ipynb [file.ju.py]
+ipynb2jupytext [-h] [--stdout] file.ipynb [file.ju.py]
 ```
 
+If you're already familiar with Jupytext, feel free to use it instead.
+
 **Option 2**: This method requires that you have already connected to the Jupynium server:
 
 1. Open your `.ipynb` file in the web browser after connecting to the server
@@ -334,36 +336,51 @@ If you have `auto_attach_to_server = false` during setup, you need to run `:Jupy
 
 ## 📝 Jupynium file format (.ju.py or .ju.\*)
 
-The file format is designed to be LSP friendly even with markdown code injected into it. The markdown cells will be part of a Python string: `"""%%` ... `%%"""`.
+The Jupynium file format follows Jupytext's percent format. In order for Jupynium to detect the files, name them as `*.ju.py` or specify `jupynium_file_pattern` in `require("jupynium").setup()`.
+
+**Code cell:**  
+Any code below this line (and before the next separator) will be the content of a code cell.
 
-**Code cell separators:**  
-i.e. Any code below this line (and before the next separator) will be a code cell.
+- `# %%`
 
-- `# %%`: recommended
-- `%%"""`: use when you want to close a markdown cell
-- `%%'''`
+**Magic commands**
+
+- `# %time` becomes `%time` in notebook.
+- If you want to really comment out magic commands, make the line not start with `# %`. For example,
+  - `## %time`
+  - `#%time`
 
-**Markdown cell separators:**
+**Markdown cell:**
+Any code below this line will be markdown cell content.  
 
-- `"""%%`: recommended
-- `'''%%`
-- `# %%%`
 - `# %% [md]`
 - `# %% [markdown]`
 
+In Python, the recommended way is to wrap the whole cell content as a multi-line string.
+
+```python
+# %% [md]
+"""
+# This is a markdown heading
+This is markdown content
+"""
+```
+
+In other languages like R, you'll need to comment every line.
+
+```r
+# %% [md]
+# # This is a markdown heading
+# This is markdown content
+```
+
 **Explicitly specify the first cell separator to use it like a notebook.**
 
 - If there is one or more cells, it works as a notebook mode.
   - Contents before the first cell are ignored, so use it as a heading (shebang etc.)
 - If there is no cell, it works as a markdown preview mode.
   - It will still open ipynb file but will one have one markdown cell.
 
-**Magic commands**
-
-- `# %time` becomes `%time` in notebook.
-- If you want to really comment out magic commands, make the line not start with `# %`. For example,
-  - `## %time`
-  - `#%time`
 
 ## ⌨️ Keybindings
 

diff --git a/after/queries/python/highlights.scm b/after/queries/python/highlights.scm
@@ -0,0 +1,19 @@
+; extends
+
+(expression_statement
+ ((string) @_var @variable)
+ (#match? @_var "^[\"']{3}[%]{2}.*[%]{2}[\"']{3}$")
+)
+
+; it can be # %% [markdown] or # %% [md]
+((
+  (comment) @_mdcomment
+  . (expression_statement 
+      (string (string_content) @variable)))
+  (#lua-match? @_mdcomment "^# %%%% %[markdown%]"))
+
+((
+  (comment) @_mdcomment
+  . (expression_statement 
+      (string (string_content) @variable)))
+  (#lua-match? @_mdcomment "^# %%%% %[md%]"))
diff --git a/after/queries/python/injections.scm b/after/queries/python/injections.scm
@@ -7,3 +7,16 @@
  ((string) @markdown @markdown_inline)
  (#match? @markdown_inline "^[\"']{3}[%]{2}.*[%]{2}[\"']{3}$")
 )
+
+; it can be # %% [markdown] or # %% [md]
+((
+  (comment) @_mdcomment
+  . (expression_statement 
+      (string (string_content) @markdown @markdown_inline)))
+  (#lua-match? @_mdcomment "^# %%%% %[markdown%]"))
+
+((
+  (comment) @_mdcomment
+  . (expression_statement 
+      (string (string_content) @markdown @markdown_inline)))
+  (#lua-match? @_mdcomment "^# %%%% %[md%]"))
diff --git a/src/jupynium/buffer.py b/src/jupynium/buffer.py
@@ -14,6 +14,21 @@
 )
 
 
+def _process_cell_type(cell_type: str) -> str:
+    if cell_type == "markdown (jupytext)":
+        return "markdown"
+
+    return cell_type
+
+
+def _process_cell_types(cell_types: list[str]) -> list[str]:
+    """
+    Return the cell types, e.g. ["markdown", "code", "header"].
+    markdown (jupytext) is converted to markdown.
+    """
+    return [_process_cell_type(cell_type) for cell_type in cell_types]
+
+
 class JupyniumBuffer:
     """
     This class mainly deals with the Nvim buffer and its cell information.
@@ -27,10 +42,6 @@ def __init__(
     ):
         """
         self.buf is a list of lines of the nvim buffer,
-        with the exception that the commented magic commands are normal magic commands.
-        e.g. '# %time' -> '%time'
-        and jupytext markdown cell content also strips the leading comment.
-        e.g. '# # Markdown header' -> '# Markdown header'
 
         Args:
             header_cell_type (str, optional): Use only when partial update.
@@ -58,9 +69,10 @@ def full_analyse_buf(self, header_cell_type="header"):
         During the partial update, the header cell will be continuation from the existing cell.
         We don't know if it will be header/cell/markdown.
         So we need to pass the header_cell_type.
+        (This is deprecated, in favour of self.get_cells_text() dealing with content processing.)
 
         Args:
-            header_cell_type (str, optional): Use only when partial update.
+            header_cell_type (str, optional): Used to be used only when partial update. Now deprecated.
         """
         num_rows_this_cell = 0
         num_rows_per_cell = []
@@ -86,33 +98,80 @@ def full_analyse_buf(self, header_cell_type="header"):
                 num_rows_per_cell.append(num_rows_this_cell)
                 num_rows_this_cell = 1
                 cell_types.append("code")
-            elif line.startswith("# %"):
-                # Use '# %' for magic commands
-                # e.g. '# %matplotlib inline'
-                # Remove the comment
-                if cell_types[-1] == "code":
-                    self.buf[row] = self.buf[row][2:]
-                num_rows_this_cell += 1
-            elif line.startswith("# "):
-                # Remove the comment for markdown cells
-                # Only activated if the cell separator is like Jupytext's
-                # Useful for non-python languages like R
-                if cell_types[-1] == "markdown (jupytext)":
-                    self.buf[row] = self.buf[row][2:]
-                num_rows_this_cell += 1
-            elif line == '"""':
-                # Remove the comment for markdown cells
-                # Only activated if the cell separator is like Jupytext's
-                if cell_types[-1] == "markdown (jupytext)":
-                    self.buf[row] = ""
-                num_rows_this_cell += 1
             else:
                 num_rows_this_cell += 1
         num_rows_per_cell.append(num_rows_this_cell)
 
         self.num_rows_per_cell = num_rows_per_cell
         self.cell_types = cell_types
 
+    def _process_cell_text(self, cell_type, lines: list[str]):
+        """
+        Assuming that lines is just one cell's content, process it.
+        """
+        if cell_type == "code":
+            return "\n".join(
+                line[2:] if line.startswith("# %") else line for line in lines
+            )
+        elif cell_type == "markdown (jupytext)":
+            if len(lines) > 0 and lines[0] == '"""':
+                return "\n".join(line for line in lines if not line.startswith('"""'))
+            else:
+                return "\n".join(
+                    line[2:] if line.startswith("# ") else line for line in lines
+                )
+        else:
+            # header, markdown
+            return "\n".join(lines)
+
+    def get_cells_text(
+        self, start_cell_idx: int, end_cell_idx: int, strip: bool = True
+    ) -> list[str]:
+        """
+        Get processed cell text.
+        In a code cell, remove comments for the magic commands.
+        e.g. '# %time' -> '%time'
+        In a markdown cell, remove the leading # from the lines or multiline string.
+        e.g. '# # Markdown header' -> '# Markdown header'
+        """
+
+        if start_cell_idx == 0:
+            start_row_offset = 0
+        else:
+            start_row_offset = 1
+
+        texts_per_cell = []
+        start_row = self.get_cell_start_row(start_cell_idx)
+        texts_per_cell.append(
+            self._process_cell_text(
+                self.cell_types[start_cell_idx],
+                self.buf[
+                    start_row
+                    + start_row_offset : start_row
+                    + self.num_rows_per_cell[start_cell_idx]
+                ],
+            )
+        )
+
+        for cell_idx in range(start_cell_idx + 1, end_cell_idx + 1):
+            start_row += self.num_rows_per_cell[cell_idx - 1]
+            texts_per_cell.append(
+                self._process_cell_text(
+                    self.cell_types[cell_idx],
+                    self.buf[
+                        start_row + 1 : start_row + self.num_rows_per_cell[cell_idx]
+                    ],
+                )
+            )
+
+        if strip:
+            texts_per_cell = [x.strip() for x in texts_per_cell]
+
+        return texts_per_cell
+
+    def get_cell_text(self, cell_idx: int, strip: bool = True) -> str:
+        return self.get_cells_text(cell_idx, cell_idx, strip=strip)[0]
+
     def process_on_lines(
         self, driver, strip, lines, start_row, old_end_row, new_end_row
     ):
@@ -196,23 +255,31 @@ def _on_lines_update_buf(self, lines, start_row, old_end_row, new_end_row):
                     (
                         "cell_type",
                         cell_idx + 1,
-                        new_lines_buf.cell_types[
-                            1 : 1 + len(notebook_cell_delete_operations)
-                        ],
+                        _process_cell_types(
+                            new_lines_buf.cell_types[
+                                1 : 1 + len(notebook_cell_delete_operations)
+                            ]
+                        ),
                     )
                 ]
                 notebook_cell_operations.append(
                     (
                         "insert",
                         cell_idx + 1,
-                        new_lines_buf.cell_types[
-                            1 + len(notebook_cell_delete_operations) :
-                        ],
+                        _process_cell_types(
+                            new_lines_buf.cell_types[
+                                1 + len(notebook_cell_delete_operations) :
+                            ]
+                        ),
                     )
                 )
             else:
                 notebook_cell_operations = [
-                    ("cell_type", cell_idx + 1, new_lines_buf.cell_types[1:])
+                    (
+                        "cell_type",
+                        cell_idx + 1,
+                        _process_cell_types(new_lines_buf.cell_types[1:]),
+                    )
                 ]
 
             num_tail_rows = self.num_rows_per_cell[cell_idx] - row_within_cell
@@ -259,7 +326,7 @@ def _apply_cell_operations(self, driver, notebook_cell_operations):
                         f"Cell {nb_cell_idx + i} type change to {cell_type} from Notebook"
                     )
                     # "markdown" or "markdown (jupytext)"
-                    if cell_type.startswith("markdown"):
+                    if cell_type == "markdown":
                         driver.execute_script(
                             "Jupyter.notebook.cells_to_markdown([arguments[0]]);",
                             nb_cell_idx + i,
@@ -346,30 +413,7 @@ def _partial_sync_to_notebook(
             if start_cell_idx == 0:
                 start_cell_idx = 1
 
-            texts_per_cell = []
-            start_row = self.get_cell_start_row(start_cell_idx)
-            texts_per_cell.append(
-                "\n".join(
-                    self.buf[
-                        start_row
-                        + 1 : start_row
-                        + self.num_rows_per_cell[start_cell_idx]
-                    ]
-                )
-            )
-
-            for cell_idx in range(start_cell_idx + 1, end_cell_idx + 1):
-                start_row += self.num_rows_per_cell[cell_idx - 1]
-                texts_per_cell.append(
-                    "\n".join(
-                        self.buf[
-                            start_row + 1 : start_row + self.num_rows_per_cell[cell_idx]
-                        ]
-                    )
-                )
-
-            if strip:
-                texts_per_cell = [x.strip() for x in texts_per_cell]
+            texts_per_cell = self.get_cells_text(start_cell_idx, end_cell_idx, strip)
 
             code_cell_indices = [
                 start_cell_idx + i

diff --git a/src/jupynium/cmds/ipynb2jupy.py b/src/jupynium/cmds/ipynb2jupy.py
@@ -9,7 +9,7 @@
 
 def get_parser():
     parser = argparse.ArgumentParser(
-        description="Convert ipynb to a jupynium file (.ju.py)",
+        description="Convert ipynb to a jupynium file (.ju.py). Deprecated: use ipynb2jupytext instead.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     parser.add_argument("ipynb_path", help="Path to ipynb file")