From 08bfc679ff51b319f4241f02069b44495153fd7a Mon Sep 17 00:00:00 2001
From: JasonJoosteCSIRO <120607685+JasonJoosteCSIRO@users.noreply.github.com>
Date: Sat, 6 May 2023 05:23:32 +1000
Subject: [PATCH 1/3] Sequential cell ids (#184)

* Add command line argument keep-id, which maintiains randomly generated cell ids. Otherwise cell ids are assigned incrementally (after the removal of cells), which should keep them consistent across runs in version control

* Modify test_cell and test_exception in test_keep_output_tags.py to use the new strip_output signature

* Fix failed test_end_to_end_nbstripout with test_max_size by passing --keep-id for keeping the existing ids

* Add tests for notebooks with and without the --keep-id flag. A new extension expected_id was added for expected output with ordered ids

* Modify the readme to include the --include-id flag

* Add keyword arguments for None inputs in test_keep_output_tags.py

* Rename expected output files to make desired sequential ids more explicit

Co-authored-by: Florian Rathgeber <florian.rathgeber@gmail.com>
---
 README.rst                                    |  4 +
 nbstripout/_nbstripout.py                     |  8 +-
 nbstripout/_utils.py                          |  8 +-
 ...test_max_size.ipynb.expected_sequential_id | 90 ++++++++++++++++++
 tests/e2e_notebooks/test_nbformat45.ipynb     | 93 +++++++++++++++++++
 .../test_nbformat45.ipynb.expected            | 61 ++++++++++++
 ...st_nbformat45.ipynb.expected_sequential_id | 61 ++++++++++++
 tests/test_end_to_end.py                      |  5 +-
 tests/test_keep_output_tags.py                |  4 +-
 9 files changed, 325 insertions(+), 9 deletions(-)
 create mode 100644 tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id
 create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb
 create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb.expected
 create mode 100644 tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id

diff --git a/README.rst b/README.rst
index a161c65..3d97290 100644
--- a/README.rst
+++ b/README.rst
@@ -274,6 +274,10 @@ Do not strip the output ::
 
     nbstripout --keep-output
 
+Do not reassign the cell ids to be sequential ::
+    
+    nbstripout --keep-id
+
 To mark special cells so that the output is not stripped, you can either:
 
 1.  Set the ``keep_output`` tag on the cell. To do this, enable the tags
diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py
index 3957395..50f096a 100644
--- a/nbstripout/_nbstripout.py
+++ b/nbstripout/_nbstripout.py
@@ -373,6 +373,9 @@ def main():
                         help='Do not strip the execution count/prompt number')
     parser.add_argument('--keep-output', action='store_true',
                         help='Do not strip output', default=None)
+    parser.add_argument('--keep-id', action='store_true',
+                        help='Keep the randomly generated cell ids, '
+                        'which will be different after each execution.')
     parser.add_argument('--extra-keys', default='',
                         help='Space separated list of extra keys to strip '
                         'from metadata, e.g. metadata.foo cell.metadata.bar')
@@ -409,7 +412,6 @@ def main():
 
     parser.add_argument('files', nargs='*', help='Files to strip output from')
     args = parser.parse_args()
-
     git_config = ['git', 'config']
 
     if args._system:
@@ -487,7 +489,7 @@ def main():
                     warnings.simplefilter("ignore", category=UserWarning)
                     nb = read(f, as_version=NO_CONVERT)
 
-            nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells,
+            nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
 
             if args.dry_run:
@@ -533,7 +535,7 @@ def main():
                 warnings.simplefilter("ignore", category=UserWarning)
                 nb = read(input_stream, as_version=NO_CONVERT)
 
-            nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys, args.drop_empty_cells,
+            nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
 
             if args.dry_run:
diff --git a/nbstripout/_utils.py b/nbstripout/_utils.py
index d54ac91..322edbd 100644
--- a/nbstripout/_utils.py
+++ b/nbstripout/_utils.py
@@ -94,7 +94,7 @@ def strip_zeppelin_output(nb):
     return nb
 
 
-def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[],
+def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[],
                  strip_init_cells=False, max_size=0):
     """
     Strip the outputs, execution count/prompt number and miscellaneous
@@ -124,7 +124,7 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa
     for tag_to_drop in drop_tagged_cells:
         conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", []))
 
-    for cell in _cells(nb, conditionals):
+    for i, cell in enumerate(_cells(nb, conditionals)):
         keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells)
 
         # Remove the outputs, unless directed otherwise
@@ -148,7 +148,9 @@ def strip_output(nb, keep_output, keep_count, extra_keys=[], drop_empty_cells=Fa
             cell['prompt_number'] = None
         if 'execution_count' in cell and not keep_count:
             cell['execution_count'] = None
-
+        # Replace the cell id with an incremental value that will be consistent across runs
+        if 'id' in cell and not keep_id:
+            cell['id'] = str(i)
         for field in keys['cell']:
             pop_recursive(cell, field)
     return nb
diff --git a/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id b/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id
new file mode 100644
index 0000000..44cddfb
--- /dev/null
+++ b/tests/e2e_notebooks/test_max_size.ipynb.expected_sequential_id
@@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "This notebook tests that outputs can be cleared based on size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "aaaaaaaaaa\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"a\"*10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"a\"*100)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.4"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb b/tests/e2e_notebooks/test_nbformat45.ipynb
new file mode 100644
index 0000000..f6d3f16
--- /dev/null
+++ b/tests/e2e_notebooks/test_nbformat45.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5c42035d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'This is the new Jupyter notebook'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"This is the new Jupyter notebook\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "886205fa",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'text2'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"text2\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a183d4e9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "f(3) =  4\n"
+     ]
+    }
+   ],
+   "source": [
+    "def f(x):\n",
+    "    \"\"\"My function\n",
+    "    x : parameter\"\"\"\n",
+    "    \n",
+    "    return x+1\n",
+    "\n",
+    "print(\"f(3) = \", f(3))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb.expected b/tests/e2e_notebooks/test_nbformat45.ipynb.expected
new file mode 100644
index 0000000..fb78c3f
--- /dev/null
+++ b/tests/e2e_notebooks/test_nbformat45.ipynb.expected
@@ -0,0 +1,61 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c42035d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"This is the new Jupyter notebook\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "886205fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"text2\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a183d4e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f(x):\n",
+    "    \"\"\"My function\n",
+    "    x : parameter\"\"\"\n",
+    "    \n",
+    "    return x+1\n",
+    "\n",
+    "print(\"f(3) = \", f(3))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id b/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id
new file mode 100644
index 0000000..8c499da
--- /dev/null
+++ b/tests/e2e_notebooks/test_nbformat45.ipynb.expected_sequential_id
@@ -0,0 +1,61 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"This is the new Jupyter notebook\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"text2\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f(x):\n",
+    "    \"\"\"My function\n",
+    "    x : parameter\"\"\"\n",
+    "    \n",
+    "    return x+1\n",
+    "\n",
+    "print(\"f(3) = \", f(3))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
index 0b96923..575dd9b 100644
--- a/tests/test_end_to_end.py
+++ b/tests/test_end_to_end.py
@@ -15,7 +15,8 @@
     ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells_dontdrop.ipynb.expected", []),
     ("test_drop_tagged_cells.ipynb", "test_drop_tagged_cells.ipynb.expected", ['--drop-tagged-cells=test']),
     ("test_execution_timing.ipynb", "test_execution_timing.ipynb.expected", []),
-    ("test_max_size.ipynb", "test_max_size.ipynb.expected", ["--max-size", "50"]),
+    ("test_max_size.ipynb", "test_max_size.ipynb.expected", ["--max-size", "50", "--keep-id"]),
+    ("test_max_size.ipynb", "test_max_size.ipynb.expected_sequential_id", ["--max-size", "50"]),
     ("test_metadata.ipynb", "test_metadata.ipynb.expected", []),
     ("test_metadata.ipynb", "test_metadata_extra_keys.ipynb.expected", ["--extra-keys", "metadata.kernelspec metadata.language_info"]),
     ("test_metadata.ipynb", "test_metadata_keep_count.ipynb.expected", ["--keep-count"]),
@@ -26,6 +27,8 @@
     ("test_metadata_period.ipynb", "test_metadata_period.ipynb.expected", ["--extra-keys", "cell.metadata.application/vnd.databricks.v1+cell metadata.application/vnd.databricks.v1+notebook"]),
     ("test_strip_init_cells.ipynb", "test_strip_init_cells.ipynb.expected", ["--strip-init-cells"]),
     ("test_nbformat2.ipynb", "test_nbformat2.ipynb.expected", []),
+    ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected", ["--keep-id"]),
+    ("test_nbformat45.ipynb", "test_nbformat45.ipynb.expected_sequential_id", []),
     ("test_unicode.ipynb", "test_unicode.ipynb.expected", []),
     ("test_widgets.ipynb", "test_widgets.ipynb.expected", []),
     ("test_zeppelin.zpln", "test_zeppelin.zpln.expected", ["--mode", "zeppelin"]),
diff --git a/tests/test_keep_output_tags.py b/tests/test_keep_output_tags.py
index 7ec8567..4243a82 100644
--- a/tests/test_keep_output_tags.py
+++ b/tests/test_keep_output_tags.py
@@ -24,7 +24,7 @@ def nb_with_exception():
 
 def test_cells(orig_nb):
     nb_stripped = deepcopy(orig_nb)
-    nb_stripped = strip_output(nb_stripped, None, None)
+    nb_stripped = strip_output(nb_stripped, keep_output=None, keep_count=None, keep_id=None)
     for i, cell in enumerate(nb_stripped.cells):
         if cell.cell_type == 'code' and cell.source:
             match = re.match(r"\s*#\s*(output|no_output)", cell.source)
@@ -41,4 +41,4 @@ def test_cells(orig_nb):
 
 def test_exception(nb_with_exception):
     with pytest.raises(MetadataError):
-        strip_output(nb_with_exception, None, None)
+        strip_output(nb_with_exception, keep_output=None, keep_count=None, keep_id=None)

From 749f431366811f26114c15aefcb5149e014b9e97 Mon Sep 17 00:00:00 2001
From: wpbonelli <wesbonelli@gmail.com>
Date: Sun, 22 Oct 2023 02:40:32 -0400
Subject: [PATCH 2/3] Add python3.11 classifier to setup.py, remove python3.6
 (#186)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index dede8c5..5982ec0 100644
--- a/setup.py
+++ b/setup.py
@@ -37,10 +37,10 @@
           "Intended Audience :: Developers",
           "Programming Language :: Python",
           "Programming Language :: Python :: 3",
-          "Programming Language :: Python :: 3.6",
           "Programming Language :: Python :: 3.7",
           "Programming Language :: Python :: 3.8",
           "Programming Language :: Python :: 3.9",
           "Programming Language :: Python :: 3.10",
+          "Programming Language :: Python :: 3.11",
           "Topic :: Software Development :: Version Control",
       ])

From 6f6303a0a6e8c3fda570612731ef20c265358379 Mon Sep 17 00:00:00 2001
From: "J. Sebastian Paez" <jspaezp@gmail.com>
Date: Tue, 13 Feb 2024 16:30:15 -0800
Subject: [PATCH 3/3] (wip) first commit

---
 nbstripout/_nbstripout.py | 55 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py
index 50f096a..6f4c748 100644
--- a/nbstripout/_nbstripout.py
+++ b/nbstripout/_nbstripout.py
@@ -354,6 +354,9 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
 def main():
     parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
     task = parser.add_mutually_exclusive_group()
+    task.add_argument('--verify', action='store_true',
+                      help='Print which notebooks would have been stripped, '
+                      'Like dry-run but returns an error if a file would have been changed')
     task.add_argument('--dry-run', action='store_true',
                       help='Print which notebooks would have been stripped')
     task.add_argument('--install', action='store_true',
@@ -469,6 +472,7 @@ def main():
     input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None
     output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='')
 
+    to_change_files = []
     for filename in args.files:
         if not (args.force or filename.endswith('.ipynb') or filename.endswith('.zpln')):
             continue
@@ -480,10 +484,17 @@ def main():
                         output_stream.write(f'Dry run: would have stripped {filename}\n')
                         continue
                     nb = json.load(f, object_pairs_hook=collections.OrderedDict)
+                    pre_hash = hash(json.dumps(nb))
                     nb_stripped = strip_zeppelin_output(nb)
 
-                    with open(filename, 'w') as f:
-                        json.dump(nb_stripped, f, indent=2)
+                    if not args.verify:
+                        with open(filename, 'w') as f:
+                            json.dump(nb_stripped, f, indent=2)
+                    else:
+                        post_hash = hash(json.dumps(nb_stripped))
+
+                        if pre_hash != post_hash:
+                            to_change_files.append(filename)
                     continue
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore", category=UserWarning)
@@ -491,10 +502,10 @@ def main():
 
             nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
+            post_hash = hash(json.dumps(nb))
 
             if args.dry_run:
                 output_stream.write(f'Dry run: would have stripped {filename}\n')
-
                 continue
 
             if args.textconv:
@@ -504,10 +515,15 @@ def main():
 
                 output_stream.flush()
             else:
-                with io.open(filename, 'w', encoding='utf8', newline='') as f:
-                    with warnings.catch_warnings():
-                        warnings.simplefilter("ignore", category=UserWarning)
-                        write(nb, f)
+                if args.verify:
+                    if pre_hash != post_hash:
+                        output_stream.write(f'Verify: would have stripped {filename}\n')
+                        to_change_files.append(filename)
+                else:
+                    with io.open(filename, 'w', encoding='utf8', newline='') as f:
+                        with warnings.catch_warnings():
+                            warnings.simplefilter("ignore", category=UserWarning)
+                            write(nb, f)
         except NotJSONError:
             print(f"'{filename}' is not a valid notebook", file=sys.stderr)
             raise SystemExit(1)
@@ -519,28 +535,53 @@ def main():
             print(f"Could not strip '{filename}'", file=sys.stderr)
             raise
 
+    if to_change_files:
+        raise SystemExit(1)
+
     if not args.files and input_stream:
         try:
             if args.mode == 'zeppelin':
                 if args.dry_run:
                     output_stream.write('Dry run: would have stripped input from stdin\n')
                     raise SystemExit(0)
+
                 nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict)
                 nb_stripped = strip_zeppelin_output(nb)
+
+                if args.verify:
+                    pre_hash = hash(json.dumps(nb))
+                    post_hash = hash(json.dumps(nb_stripped))
+                    if pre_hash != post_hash:
+                        output_stream.write('Verify: would have stripped input from stdin\n')
+                        raise SystemExit(1)
+                    else:
+                        raise SystemExit(0)
+                    
                 json.dump(nb_stripped, output_stream, indent=2)
                 output_stream.write('\n')
                 output_stream.flush()
                 raise SystemExit(0)
+
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=UserWarning)
                 nb = read(input_stream, as_version=NO_CONVERT)
+                pre_hash = hash(json.dumps(nb))
 
             nb = strip_output(nb, args.keep_output, args.keep_count, args.keep_id, extra_keys, args.drop_empty_cells,
                               args.drop_tagged_cells.split(), args.strip_init_cells, _parse_size(args.max_size))
+            post_hash = hash(json.dumps(nb))
 
             if args.dry_run:
                 output_stream.write('Dry run: would have stripped input from '
                                     'stdin\n')
+            elif args.verify:
+                if pre_hash != post_hash:
+                    output_stream.write(
+                        'Verify: would have stripped input from stdin\n'
+                    )
+                    raise SystemExit(1)
+
+                output_stream.flush()
             else:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore", category=UserWarning)