Python representation is pep8 when notebook cells are #154

mwouts · Jan 18, 2019 · 421b5f1 · 421b5f1
1 parent 205686a
commit 421b5f1
Show file tree

Hide file tree

Showing 21 changed files with 157 additions and 71 deletions.
diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
@@ -13,6 +13,7 @@
     double_percent_options_to_metadata
 from .stringparser import StringParser
 from .magics import uncomment_magic, is_magic, unescape_code_start
+from .pep8 import pep8_lines_to_end_of_cell_marker, pep8_lines_between_cells
 
 _BLANK_LINE = re.compile(r"^\s*$")
 _PY_COMMENT = re.compile(r"^\s*#")
@@ -95,6 +96,7 @@ def __init__(self, fmt=None):
         self.comment_magics = fmt['comment_magics'] if 'comment_magics' in fmt else self.default_comment_magics
         self.metadata = None
         self.content = []
+        self.explicit_eoc = None
         self.cell_type = None
         self.language = None
 
@@ -122,7 +124,11 @@ def read(self, lines):
         if not self.metadata:
             self.metadata = {}
 
-        if self.lines_to_next_cell != 1:
+        org_lines = self.content
+        if self.ext == '.py' and self.cell_type != 'code' and self.content:
+            org_lines = ['#']  # cell was originally commented
+        if self.lines_to_next_cell != 1 if (self.explicit_eoc and self.cell_type == 'code' and self.ext == '.py') \
+                else self.lines_to_next_cell != pep8_lines_between_cells(org_lines, lines[pos_next_cell:], self.ext):
             self.metadata['lines_to_next_cell'] = self.lines_to_next_cell
 
         if self.language:
@@ -190,7 +196,7 @@ def find_cell_end(self, lines):
     def find_cell_content(self, lines):
         """Parse cell till its end and set content, lines_to_next_cell.
         Return the position of next cell start"""
-        cell_end_marker, next_cell_start, explicit_eoc = self.find_cell_end(lines)
+        cell_end_marker, next_cell_start, self.explicit_eoc = self.find_cell_end(lines)
 
         # Metadata to dict
         if self.metadata is None:
@@ -202,17 +208,23 @@ def find_cell_content(self, lines):
         # Cell content
         source = lines[cell_start:cell_end_marker]
 
+        # Exactly two empty lines at the end of cell (caused by PEP8)?
+        if self.ext == '.py' and self.explicit_eoc:
+            if last_two_lines_blank(source):
+                source = source[:-2]
+                lines_to_end_of_cell_marker = 2
+            else:
+                lines_to_end_of_cell_marker = 0
+
+            if lines_to_end_of_cell_marker != pep8_lines_to_end_of_cell_marker(source, self.ext):
+                self.metadata['lines_to_end_of_cell_marker'] = lines_to_end_of_cell_marker
+
         if not is_active(self.ext, self.metadata) or \
                 ('active' not in self.metadata and self.language and self.language != self.default_language):
             self.content = uncomment(source, self.comment if self.ext not in ['.r', '.R'] else '#')
         else:
             self.content = self.uncomment_code_and_magics(source)
 
-        # Exactly two empty lines at the end of cell (caused by PEP8)?
-        if self.ext == '.py' and explicit_eoc and last_two_lines_blank(source):
-            self.content = source[:-2]
-            self.metadata['lines_to_end_of_cell_marker'] = 2
-
         # Is this a raw cell?
         if ('active' in self.metadata and not is_active('ipynb', self.metadata)) or \
                 (self.ext == '.md' and self.cell_type == 'code' and self.language is None):
@@ -225,7 +237,7 @@ def find_cell_content(self, lines):
                 _BLANK_LINE.match(lines[next_cell_start]) and
                 not _BLANK_LINE.match(lines[next_cell_start + 1])):
             next_cell_start += 1
-        elif (explicit_eoc and next_cell_start + 2 < len(lines) and
+        elif (self.explicit_eoc and next_cell_start + 2 < len(lines) and
               _BLANK_LINE.match(lines[next_cell_start]) and
               _BLANK_LINE.match(lines[next_cell_start + 1]) and
               not _BLANK_LINE.match(lines[next_cell_start + 2])):
@@ -235,7 +247,7 @@ def find_cell_content(self, lines):
             cell_end_marker,
             next_cell_start,
             len(lines),
-            explicit_eoc)
+            self.explicit_eoc)
 
         return next_cell_start
 

diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py
@@ -9,6 +9,7 @@
 from .magics import comment_magic, escape_code_start
 from .cell_reader import LightScriptCellReader
 from .languages import _SCRIPT_EXTENSIONS
+from .pep8 import pep8_lines_to_end_of_cell_marker
 
 
 def cell_source(cell):
@@ -56,14 +57,10 @@ def __init__(self, cell, default_language, fmt=None):
             else self.default_comment_magics
 
         # how many blank lines before next cell
-        self.lines_to_next_cell = cell.metadata.get('lines_to_next_cell', 1)
-        self.lines_to_end_of_cell_marker = cell.metadata.get('lines_to_end_of_cell_marker', 0)
-
-        # for compatibility with v0.5.4 and lower (to be removed)
-        if 'skipline' in cell.metadata:
-            self.lines_to_next_cell += 1
-        if 'noskipline' in cell.metadata:
-            self.lines_to_next_cell -= 1
+        self.lines_to_next_cell = cell.metadata.get('lines_to_next_cell', None)
+        self.lines_to_end_of_cell_marker = (cell.metadata['lines_to_end_of_cell_marker']
+                                            if 'lines_to_end_of_cell_marker' in cell.metadata
+                                            else pep8_lines_to_end_of_cell_marker(self.source, self.ext))
 
         if cell.cell_type == 'raw' and 'active' not in self.metadata:
             self.metadata['active'] = ''

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -153,9 +153,9 @@ def read_metadata(text, ext):
     else:
         comment = _SCRIPT_EXTENSIONS.get(ext, {}).get('comment', '#')
 
-    metadata, _, _, _ = header_to_metadata_and_cell(lines, comment)
+    metadata, _, _, _ = header_to_metadata_and_cell(lines, comment, ext)
     if ext in ['.r', '.R'] and not metadata:
-        metadata, _, _, _ = header_to_metadata_and_cell(lines, "#'")
+        metadata, _, _, _ = header_to_metadata_and_cell(lines, "#'", ext)
 
     return metadata
 

diff --git a/jupytext/header.py b/jupytext/header.py
@@ -9,6 +9,7 @@
 from .version import __version__
 from .languages import _SCRIPT_EXTENSIONS, comment_lines
 from .metadata_filter import filter_metadata
+from .pep8 import pep8_lines_between_cells
 
 SafeRepresenter.add_representer(nbformat.NotebookNode, SafeRepresenter.represent_dict)
 
@@ -83,8 +84,8 @@ def metadata_and_cell_to_header(notebook, text_format, ext):
     """
 
     header = []
-    skipline = True
 
+    lines_to_next_cell = None
     if notebook.cells:
         cell = notebook.cells[0]
         if cell.cell_type == 'raw':
@@ -93,7 +94,7 @@ def metadata_and_cell_to_header(notebook, text_format, ext):
                     and _HEADER_RE.match(lines[0]) \
                     and _HEADER_RE.match(lines[-1]):
                 header = lines[1:-1]
-                skipline = not cell.metadata.get('noskipline', False)
+                lines_to_next_cell = cell.metadata.get('lines_to_next_cell')
                 notebook.cells = notebook.cells[1:]
 
     metadata = notebook.get('metadata', {})
@@ -118,14 +119,17 @@ def metadata_and_cell_to_header(notebook, text_format, ext):
         header = ['---'] + header + ['---']
 
     header = comment_lines(header, text_format.header_prefix)
+    if lines_to_next_cell is None and notebook.cells:
+        lines_to_next_cell = pep8_lines_between_cells(header, notebook.cells[0], ext)
+    else:
+        lines_to_next_cell = 0
 
-    if header and skipline:
-        header += ['']
+    header.extend([''] * lines_to_next_cell)
 
     return header
 
 
-def header_to_metadata_and_cell(lines, header_prefix):
+def header_to_metadata_and_cell(lines, header_prefix, ext=None):
     """
     Return the metadata, a boolean to indicate if a jupyter section was found,
      the first cell of notebook if some metadata is found outside of the jupyter section, and next loc in text
@@ -186,20 +190,20 @@ def header_to_metadata_and_cell(lines, header_prefix):
         if jupyter:
             metadata.update(yaml.load('\n'.join(jupyter))['jupyter'])
 
-        skipline = True
+        lines_to_next_cell = 1
         if len(lines) > i + 1:
             line = uncomment_line(lines[i + 1], header_prefix)
             if not _BLANK_RE.match(line):
-                skipline = False
+                lines_to_next_cell = 0
             else:
                 i = i + 1
         else:
-            skipline = False
+            lines_to_next_cell = 0
 
         if header:
             cell = new_raw_cell(source='\n'.join(['---'] + header + ['---']),
-                                metadata={} if skipline else
-                                {'lines_to_next_cell': 0})
+                                metadata={} if lines_to_next_cell == pep8_lines_between_cells(
+                                    ['---'], lines[i + 1:], ext) else {'lines_to_next_cell': lines_to_next_cell})
         else:
             cell = None
 

diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py
@@ -12,6 +12,7 @@
     encoding_and_executable, insert_or_test_version_number
 from .languages import default_language_from_metadata_and_ext, set_main_and_cell_language
 from .cell_metadata import _JUPYTEXT_CELL_METADATA
+from .pep8 import pep8_lines_between_cells
 
 
 class TextNotebookConverter(NotebookReader, NotebookWriter):
@@ -27,7 +28,9 @@ def reads(self, s, **_):
         lines = s.splitlines()
 
         cells = []
-        metadata, jupyter_md, header_cell, pos = header_to_metadata_and_cell(lines, self.implementation.header_prefix)
+        metadata, jupyter_md, header_cell, pos = header_to_metadata_and_cell(lines,
+                                                                             self.implementation.header_prefix,
+                                                                             self.implementation.extension)
         if 'comment_magics' in metadata.get('jupytext', {}):
             self.fmt['comment_magics'] = metadata['jupytext']['comment_magics']
 
@@ -98,16 +101,17 @@ def writes(self, nb, **kwargs):
         texts = [cell.cell_to_text() for cell in cell_exporters]
 
         for i, cell in enumerate(cell_exporters):
-            text = cell.simplify_code_markers(
-                texts[i], texts[i + 1] if i + 1 < len(texts) else None, lines)
+            next_text = texts[i + 1] if i + 1 < len(texts) else None
+            text = cell.simplify_code_markers(texts[i], next_text, lines)
 
             if i == 0 and self.implementation.format_name and \
                     self.implementation.format_name.startswith('sphinx') and \
                     (text in [['%matplotlib inline'], ['# %matplotlib inline']]):
                 continue
 
             lines.extend(text)
-            lines.extend([''] * cell.lines_to_next_cell)
+            lines.extend([''] * (cell.lines_to_next_cell if cell.lines_to_next_cell is not None else
+                                 pep8_lines_between_cells(text, next_text, self.implementation.extension)))
 
             # two blank lines between markdown cells in Rmd
             if self.ext in ['.Rmd', '.md'] and not cell.is_code():

diff --git a/jupytext/pep8.py b/jupytext/pep8.py
@@ -0,0 +1,53 @@
+"""Determine how many blank lines should be inserted between two cells"""
+
+
+def cell_starts_with_function_or_class(lines):
+    """Is the first non-commented line of the cell either a function or a class?"""
+    for line in lines:
+        if line.startswith('#'):
+            continue
+        if line.startswith('def ') or line.startswith('class '):
+            return True
+        return False
+
+    return False
+
+
+def cell_ends_with_function_or_class(lines):
+    """Does the last line of the cell belong to an indented code?"""
+    if not lines:
+        return False
+    if not lines[-1].startswith(' '):
+        return False
+    if not lines[-1].strip():
+        return False
+
+    # find the first line, starting from the bottom, that is not indented
+    for line in lines[::-1]:
+        if line.startswith('#') or line.startswith(' '):
+            continue
+        if line.startswith('def ') or line.startswith('class '):
+            return True
+        return False
+
+    return False
+
+
+def pep8_lines_to_end_of_cell_marker(lines, ext):
+    """In case the cell has an end-of-cell marker, how many blank lines should be added to it?"""
+    return 2 if ext == '.py' and cell_ends_with_function_or_class(lines) else 0
+
+
+def pep8_lines_between_cells(prev_lines, next_lines, ext):
+    """How many blank lines should be added between the two python paragraphs to make them pep8?"""
+    if not next_lines:
+        return 1
+    if not prev_lines:
+        return 0
+    if ext != '.py':
+        return 1
+    if cell_ends_with_function_or_class(prev_lines):
+        return 2
+    if cell_starts_with_function_or_class(next_lines):
+        return 2
+    return 1
diff --git a/tests/notebooks/mirror/Rmd_to_ipynb/R_sample.ipynb b/tests/notebooks/mirror/Rmd_to_ipynb/R_sample.ipynb
@@ -42,9 +42,11 @@
  ],
  "metadata": {
   "jupytext": {
-   "main_language": "R"
+   "cell_metadata_filter": "language,-all",
+   "main_language": "R",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/Rmd_to_ipynb/chunk_options.ipynb b/tests/notebooks/mirror/Rmd_to_ipynb/chunk_options.ipynb
@@ -67,4 +67,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/Rmd_to_ipynb/ioslides.ipynb b/tests/notebooks/mirror/Rmd_to_ipynb/ioslides.ipynb
@@ -88,9 +88,11 @@
  ],
  "metadata": {
   "jupytext": {
-   "main_language": "python"
+   "cell_metadata_filter": "fig.height,language,hide_input,fig.width,-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/Rmd_to_ipynb/knitr-spin.ipynb b/tests/notebooks/mirror/Rmd_to_ipynb/knitr-spin.ipynb
@@ -173,9 +173,11 @@
  ],
  "metadata": {
   "jupytext": {
-   "main_language": "R"
+   "cell_metadata_filter": "hide_output,fig.height,name,language,fig.width,cache,-all",
+   "main_language": "R",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/Rmd_to_ipynb/markdown.ipynb b/tests/notebooks/mirror/Rmd_to_ipynb/markdown.ipynb
@@ -35,9 +35,11 @@
  ],
  "metadata": {
   "jupytext": {
-   "main_language": "python"
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/script_to_ipynb/hydrogen.ipynb b/tests/notebooks/mirror/script_to_ipynb/hydrogen.ipynb
@@ -39,9 +39,11 @@
  ],
  "metadata": {
   "jupytext": {
-   "main_language": "python"
+   "cell_metadata_filter": "tags,title,-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/tests/notebooks/mirror/script_to_ipynb/julia_sample_script.ipynb b/tests/notebooks/mirror/script_to_ipynb/julia_sample_script.ipynb
@@ -78,10 +78,12 @@
  ],
  "metadata": {
   "jupytext": {
+   "cell_metadata_filter": "-all",
    "encoding": "# -*- coding: utf-8 -*-",
-   "main_language": "julia"
+   "main_language": "julia",
+   "notebook_metadata_filter": "-all"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -67,4 +67,4 @@ @@
      },
      "nbformat": 4,
      "nbformat_minor": 2
-    }
+    }