From 41b7e9c93b50da1d6feb486aceb2c4d534374090 Mon Sep 17 00:00:00 2001 From: John Paton Date: Thu, 21 Jan 2021 06:36:45 +0100 Subject: [PATCH] Enable integration with pre-commit (#698) * Add .pre-commit-hooks.yaml * Use files instead of types * Rename main hook to jupytext * Remove extra hooks * Add default --from * Update docs * Add --add-untracked for pre-commit compatibility * Flip boolean * Update docs * Exit nonzero if a new file is added to the index with --add-untracked A pre-commit hook should fail if anything in the index is modified. By adding the new untracked file ourselves, we circumvent pre-commit, which means it won't have the chance to run other configured hooks against the new file. Exiting non-zero in this case ensures that the commit will fail, forcing the user to commit again. This time, the new file will be in the index from the start, so other hooks will run against it. * Remove accidental changes * Remove accidental changes * Clarify help * Strip for safety * Add tests for --add-untracked * Fix pre-commit hooks * Fix tests for windows * Add tests for pre-commit integration * Run pre-commit tests in ci * Retry running pre-commit tests in ci * Refactor to better align with pre-commit * Update tests for precommit * Update hook description Co-authored-by: Aaron Gokaslan * Log output files that are untracked * Update examples * Fixed examples pre-commit documentation * Fix trailing whitespace * Ensure hooks runs in serial Prevents race condition during sync * Fix typo * Clarify log message * Convert test to --sync * args should be array * Ignore unmatched inputs also when using --sync * add notebook types following https://github.com/pre-commit/identify/blob/master/identify/extensions.py * Test & fix 'alert_untracked' in the --sync mode * Add .pre-commit-hooks.yaml * Use files instead of types * Rename main hook to jupytext * Remove extra hooks * Add default --from * Update docs * Add --add-untracked for pre-commit compatibility * Flip boolean * Update docs * Exit nonzero if a new file is added to the index with --add-untracked A pre-commit hook should fail if anything in the index is modified. By adding the new untracked file ourselves, we circumvent pre-commit, which means it won't have the chance to run other configured hooks against the new file. Exiting non-zero in this case ensures that the commit will fail, forcing the user to commit again. This time, the new file will be in the index from the start, so other hooks will run against it. * Remove accidental changes * Remove accidental changes * Clarify help * Strip for safety * Add tests for --add-untracked * Fix pre-commit hooks * Fix tests for windows * Add tests for pre-commit integration * Run pre-commit tests in ci * Retry running pre-commit tests in ci * Refactor to better align with pre-commit * Update tests for precommit * Update hook description Co-authored-by: Aaron Gokaslan * Log output files that are untracked * Update examples * Fixed examples pre-commit documentation * Fix trailing whitespace * Ensure hooks runs in serial Prevents race condition during sync * Fix typo * Clarify log message * Convert test to --sync * args should be array * Remove types * Try calling pre_comit directly * When in doubt, return False * Fix exisiting file test * Only update the timestamp (not the content) of the text input file * Only run on notebooks * use arrays * Only run on notebooks * Handle pre-commit not being installed Relevant tests will be skipped in that case * Handle right exception Co-authored-by: Aaron Gokaslan Co-authored-by: Aaron Gokaslan Co-authored-by: Marc Wouts --- .pre-commit-hooks.yaml | 8 ++ docs/CHANGELOG.md | 7 +- docs/using-pre-commit.md | 52 ++++---- jupytext/cli.py | 86 +++++++++++-- requirements-dev.txt | 1 + tests/test_metadata_filter.py | 5 +- tests/test_pre_commit.py | 219 +++++++++++++++++++++++++++++++++- tests/utils.py | 3 + 8 files changed, 334 insertions(+), 47 deletions(-) create mode 100644 .pre-commit-hooks.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 000000000..6faaadc4f --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,8 @@ +- id: jupytext + name: jupytext + description: Runs jupytext on all notebooks and paired files. + language: python + entry: jupytext --ignore-unmatched --alert-untracked + pass_filenames: true + require_serial: true + types: [jupyter] diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f61df207a..3be525959 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -5,13 +5,14 @@ Jupytext ChangeLog --------------- **Changed** -- Jupytext does not work properly with the new cell ids of the version 4.5 of `nbformat>=5.1.0` yet, so we added the requirement `nbformat<=5.0.8` (#715) +- Jupytext does not work properly with the new cell ids of the version 4.5 of `nbformat>=5.1.0` yet, so we added the requirement `nbformat<=5.0.8` ([#715](https://github.com/mwouts/jupytext/issues/715)) +- `jupytext --sync` only updates the timestamp of the text file (not the file itself) when that file is the most recent ([#698](https://github.com/mwouts/jupytext/issues/698)) **Fixed** -- Indented magic commands are supported (#694) +- Indented magic commands are supported ([#694](https://github.com/mwouts/jupytext/issues/694)) **Added** -- Added a test that ensures that `py:percent` scripts end with exactly one blank line (#682) +- Added a test that ensures that `py:percent` scripts end with exactly one blank line ([#682](https://github.com/mwouts/jupytext/issues/682)) 1.9.1 (2021-01-06) diff --git a/docs/using-pre-commit.md b/docs/using-pre-commit.md index 402d67cce..29c1907df 100644 --- a/docs/using-pre-commit.md +++ b/docs/using-pre-commit.md @@ -27,35 +27,43 @@ Note that these hooks do not update the `.ipynb` notebook when you pull. Make su ## Using Jupytext with the pre-commit package manager -Using Jupytext with the [pre-commit package manager](https://pre-commit.com/) is another option. You could add the following to your `.pre-commit-config.yaml` file: -``` +Using Jupytext with the [pre-commit package manager](https://pre-commit.com/) is another option. You could add the following to your `.pre-commit-config.yaml` file to sync all staged notebooks: + +```yaml repos: -- repo: local +- repo: https://github.com/mwouts/jupytext + rev: #CURRENT_TAG/COMMIT_HASH hooks: - id: jupytext - name: jupytext - entry: jupytext --to md - files: .ipynb - language: python + args: [--sync] ``` -Here is another `.pre-commit-config.yaml` example that uses the --pre-commit mode of Jupytext to convert all `.ipynb` notebooks to `py:light` representation and unstage the `.ipynb` files before committing. +You can provide almost all command line arguments to Jupytext in pre-commit, for example to produce several kinds of output files: + +```yaml +repos: +- repo: https://github.com/mwouts/jupytext + rev: #CURRENT_TAG/COMMIT_HASH + hooks: + - id: jupytext + args: [--from, ipynb, --to, py:light, --to, markdown] ``` + +If you are combining Jupytext with other pre-commit hooks, you must ensure that all hooks will pass on any files you generate. For example, if you have a hook for using `black` to format all your python code, then you should use Jupytext's `--pipe` option to also format newly generated Python scripts before writing them: + +```yaml repos: - - - repo: local +- repo: https://github.com/mwouts/jupytext + rev: #CURRENT_TAG/COMMIT_HASH hooks: - - - id: jupytext - name: jupytext - entry: jupytext --from ipynb --to py:light --pre-commit - pass_filenames: false - language: python - - - id: unstage-ipynb - name: unstage-ipynb - entry: git reset HEAD **/*.ipynb - pass_filenames: false - language: system + - id: jupytext + args: [--sync, --pipe, black] + additional_dependencies: + - black==19.10b0 # Matches hook +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black + language_version: python3 ``` diff --git a/jupytext/cli.py b/jupytext/cli.py index 4c2db4489..72424dd6d 100644 --- a/jupytext/cli.py +++ b/jupytext/cli.py @@ -87,6 +87,18 @@ def parse_jupytext_args(args=None): "on the notebooks found in the git index, which have an " "extension that matches the (optional) --from argument.", ) + parser.add_argument( + "--ignore-unmatched", + action="store_true", + help="Ignore passed filepaths that aren't in the source format " + "you are trying to convert from.", + ) + parser.add_argument( + "--alert-untracked", + action="store_true", + help="Exit with a non-zero status if the output files are not " + "tracked in the git index", + ) parser.add_argument( "--from", dest="input_format", @@ -417,15 +429,29 @@ def jupytext_single_file(nb_file, args, log): msg += " Maybe you mean 'jupytext --sync {}' ?".format(args.set_formats) raise ValueError(msg) - nb_dest = args.output or ( - None - if not args.output_format - else ( - "-" - if nb_file == "-" - else full_path(base_path(nb_file, args.input_format), args.output_format) - ) - ) + nb_dest = None + if args.output: + nb_dest = args.output + elif args.output_format and nb_file == "-": + nb_dest = "-" + else: + try: + bp = base_path(nb_file, args.input_format) + except InconsistentPath: + if args.ignore_unmatched: + log( + "[jupytext] Ignoring unmatched input path {}{}".format( + nb_file, + " for format {}".format(args.input_format) + if args.input_format + else "", + ) + ) + return 0 + else: + raise + if args.output_format: + nb_dest = full_path(bp, args.output_format) config = load_jupytext_config(os.path.abspath(nb_file)) @@ -688,18 +714,37 @@ def jupytext_single_file(nb_file, args, log): if modified: inputs_nb_file = outputs_nb_file = None + untracked_paths = [] + def write_function(path, fmt): - # Do not write the ipynb file if it was not modified - # But, always write text representations to make sure they are the most recent - if path == inputs_nb_file and path == outputs_nb_file: + if path == inputs_nb_file: + # We update the timestamp of the text file to make sure it remains more recent than the ipynb + if path != outputs_nb_file: + log("[jupytext] Sync timestamp of '{}'".format(nb_file)) + os.utime(nb_file, None) + + # We don't write the ipynb file if it was not modified return + log("[jupytext] Updating '{}'".format(path)) write(notebook, path, fmt=fmt) if args.pre_commit: system("git", "add", path) + if args.alert_untracked and is_untracked(path): + nonlocal untracked_paths + untracked_paths.append(path) formats = prepare_notebook_for_save(notebook, config, nb_file) write_pair(nb_file, formats, write_function) + if untracked_paths: + log( + "[jupytext] Output file {nb_dest} is not tracked in the git index, " + "add it to the index using 'git add' to fix this.".format( + nb_dest=", ".join(untracked_paths) + ) + ) + return 1 + elif ( os.path.isfile(nb_file) and nb_dest.endswith(".ipynb") @@ -711,7 +756,14 @@ def write_function(path, fmt): log("[jupytext] Sync timestamp of '{}'".format(nb_file)) os.utime(nb_file, None) - return 0 + if args.alert_untracked and is_untracked(nb_dest): + log( + "[jupytext] Output file {nb_dest} is not tracked in the git index, " + "add it to the index using 'git add' to fix this.".format(nb_dest=nb_dest) + ) + return 1 + else: + return 0 def notebooks_in_git_index(fmt): @@ -731,6 +783,14 @@ def notebooks_in_git_index(fmt): return files +def is_untracked(filepath): + """Check whether a file is untracked by the git index""" + if not filepath: + return False + output = system("git", "ls-files", filepath).strip() + return output == "" + + def print_paired_paths(nb_file, fmt): """Display the paired paths for this notebook""" notebook = read(nb_file, fmt=fmt) diff --git a/requirements-dev.txt b/requirements-dev.txt index b5bf1992a..7b6dd077c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,6 +10,7 @@ setuptools toml jupyterlab==3.0.0 jupyter-packaging +pre-commit # Python 2 pathlib diff --git a/tests/test_metadata_filter.py b/tests/test_metadata_filter.py index 377174e8a..6aababecd 100644 --- a/tests/test_metadata_filter.py +++ b/tests/test_metadata_filter.py @@ -156,12 +156,11 @@ def test_default_config_has_priority_over_current_metadata( cfg_file = tmpdir.join("jupytext.toml") cfg_file.write( - """default_jupytext_formats = "ipynb,py:percent" -default_cell_metadata_filter = "-some_metadata_key" + """default_cell_metadata_filter = "-some_metadata_key" """ ) - jupytext_cli([str(py_file), "--sync"]) + jupytext_cli([str(py_file), "--to", "py"]) assert ( py_file.read() == """# %% diff --git a/tests/test_pre_commit.py b/tests/test_pre_commit.py index a73cc6a3c..c607eadd0 100644 --- a/tests/test_pre_commit.py +++ b/tests/test_pre_commit.py @@ -2,18 +2,25 @@ import os import stat +from pathlib import Path +from textwrap import dedent import unittest.mock as mock import pytest from jupytext.compare import compare from nbformat.v4.nbbase import new_notebook, new_markdown_cell, new_code_cell from jupytext import read, write -from jupytext.cli import jupytext, system +from jupytext.cli import jupytext, system, is_untracked from jupytext.compare import compare_notebooks from .utils import list_notebooks -from .utils import requires_black, requires_flake8, requires_pandoc +from .utils import requires_black, requires_flake8, requires_pandoc, requires_pre_commit from .utils import requires_jupytext_installed +try: + from pre_commit.main import main as pre_commit +except (ImportError, ModuleNotFoundError): + pre_commit = None + def git_in_tmpdir(tmpdir): """Return a function that will execute git instruction in the desired directory""" @@ -31,6 +38,15 @@ def git(*args): return git +def system_in_tmpdir(tmpdir): + """Return a function that will execute system commands in the desired directory""" + + def system_(*args): + return system(*args, cwd=str(tmpdir)) + + return system_ + + @requires_jupytext_installed def test_pre_commit_hook(tmpdir): tmp_ipynb = str(tmpdir.join("nb with spaces.ipynb")) @@ -252,13 +268,10 @@ def test_manual_call_of_pre_commit_hook(tmpdir): nb = new_notebook(cells=[]) os.chdir(str(tmpdir)) - def system_in_tmpdir(*args): - return system(*args, cwd=str(tmpdir)) - git = git_in_tmpdir(tmpdir) def hook(): - with mock.patch("jupytext.cli.system", system_in_tmpdir): + with mock.patch("jupytext.cli.system", system_in_tmpdir(tmpdir)): jupytext(["--to", "py", "--pre-commit"]) write(nb, tmp_ipynb) @@ -364,3 +377,197 @@ def test_wrap_markdown_cell(tmpdir): text = nb.cells[0].source assert len(text.splitlines()) >= 2 assert text != long_text + + +def test_is_untracked(tmpdir): + git = git_in_tmpdir(tmpdir) + + # make a test file + file = tmpdir.join("test.txt") + file.write("test file\n") + file = str(file) + + with tmpdir.as_cwd(): + # untracked + assert is_untracked(file) + + # added, not committed + git("add", file) + assert not is_untracked(file) + + # committed + git("commit", "-m", "'test'") + assert not is_untracked(file) + + +def test_ignore_unmatched_ignores(tmpdir): + # Unmatched file + file = tmpdir.join("test.txt") + file.write("Hello\n") + + # Run jupytext + status = jupytext( + ["--from", "ipynb", "--to", "py:light", "--ignore-unmatched", str(file)] + ) + + assert status == 0 + assert not tmpdir.join("test.py").exists() + + +def test_alert_untracked_alerts(tmpdir): + git_in_tmpdir(tmpdir) + + file = tmpdir.join("test.py") + file.write("print('hello')\n") + + # Run jupytext + with tmpdir.as_cwd(): + status = jupytext( + ["--from", ".py", "--to", "ipynb", "--alert-untracked", str(file)] + ) + + assert status != 0 + assert tmpdir.join("test.ipynb").exists() + + +def test_alert_untracked_alerts_when_using_sync(tmpdir): + git_in_tmpdir(tmpdir) + + file = tmpdir.join("test.py") + file.write("print('hello')\n") + + tmpdir.join(".jupytext.toml").write('default_jupytext_formats = "ipynb,py"') + + # Run jupytext + with tmpdir.as_cwd(): + status = jupytext(["--sync", "--alert-untracked", str(file)]) + + assert status != 0 + assert tmpdir.join("test.ipynb").exists() + + +def test_alert_untracked_not_alerts_for_tracked(tmpdir): + git = git_in_tmpdir(tmpdir) + + # write test notebook + nb = new_notebook(cells=[new_markdown_cell("A short notebook")]) + nb_file = str(tmpdir.join("test.ipynb")) + write(nb, nb_file) + + # write existing output + tmpdir.join("test.py").write("# Hello") + + # track output file + git("add", str("test.py")) + + # Run jupytext + with tmpdir.as_cwd(): + status = jupytext( + ["--from", "ipynb", "--to", "py:light", "--alert-untracked", str(nb_file)] + ) + + assert status == 0 + + +@requires_pre_commit +def test_pre_commit_hook_for_new_file(tmpdir): + # get the path and revision of this repo, to use with pre-commit + repo_root = str(Path(__file__).parent.parent.resolve()) + repo_rev = system("git", "rev-parse", "HEAD", cwd=repo_root).strip() + + # set up the tmpdir repo with pre-commit + git = git_in_tmpdir(tmpdir) + + pre_commit_config_yaml = dedent( + f""" + repos: + - repo: {repo_root} + rev: {repo_rev} + hooks: + - id: jupytext + args: [--sync] + """ + ) + tmpdir.join(".pre-commit-config.yaml").write(pre_commit_config_yaml) + git("add", ".pre-commit-config.yaml") + with tmpdir.as_cwd(): + pre_commit(["install", "--install-hooks", "-f"]) + + # write test notebook and sync it to py:percent + nb = new_notebook(cells=[new_markdown_cell("A short notebook")]) + nb_file = str(tmpdir.join("test.ipynb")) + py_file = str(tmpdir.join("test.py")) + write(nb, nb_file) + + with tmpdir.as_cwd(): + jupytext(["--set-formats", "ipynb,py:percent", nb_file]) + + # try to commit it, should fail since the hook runs and makes changes + git("add", nb_file) + with pytest.raises(SystemExit): + git("commit", "-m", "failing") + + # try again, it will still fail because the output hasn't been added + with pytest.raises(SystemExit): + git("commit", "-m", "still failing") + + # add the new file, now the commit will succeed + git("add", py_file) + git("commit", "-m", "passing") + assert "test.ipynb" in git("ls-files") + assert "test.py" in git("ls-files") + + +@requires_pre_commit +def test_pre_commit_hook_for_existing_changed_file(tmpdir): + # get the path and revision of this repo, to use with pre-commit + repo_root = str(Path(__file__).parent.parent.resolve()) + repo_rev = system("git", "rev-parse", "HEAD", cwd=repo_root).strip() + + git = git_in_tmpdir(tmpdir) + + # set up the tmpdir repo with pre-commit + pre_commit_config_yaml = dedent( + f""" + repos: + - repo: {repo_root} + rev: {repo_rev} + hooks: + - id: jupytext + args: [--from, ipynb, --to, "py:light"] + """ + ) + tmpdir.join(".pre-commit-config.yaml").write(pre_commit_config_yaml) + git("add", ".pre-commit-config.yaml") + with tmpdir.as_cwd(): + pre_commit(["install", "--install-hooks"]) + + # write test notebook and output file + nb = new_notebook(cells=[new_markdown_cell("A short notebook")]) + nb_file = str(tmpdir.join("test.ipynb")) + write(nb, nb_file) + py_file = str(tmpdir.join("test.py")) + jupytext(["--from", "ipynb", "--to", "py:light", str(nb_file)]) + + git("add", ".") + git("commit", "-m", "test") + + # make a change to the notebook + nb = new_notebook(cells=[new_markdown_cell("Some other text")]) + write(nb, nb_file) + + git("add", nb_file) + # now a commit will fail, and keep failing until we add the new + # changes made to the existing output to the index ourselves + with pytest.raises(SystemExit): + git("commit", "-m", "fails") + + with pytest.raises(SystemExit): + git("commit", "-m", "fails again") + + # once we add the changes, it will pass + git("add", py_file) + git("commit", "-m", "succeeds") + + assert "test.ipynb" in git("ls-files") + assert "test.py" in git("ls-files") diff --git a/tests/utils.py b/tests/utils.py index 1443862b3..a8f41ca1a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -63,6 +63,9 @@ def isort_version(): not is_myst_available(), reason="myst_parser not found" ) requires_no_myst = pytest.mark.skipif(is_myst_available(), reason="myst is available") +requires_pre_commit = pytest.mark.skipif( + not tool_version("pre-commit"), reason="pre-commit not found" +) skip_on_windows = pytest.mark.skipif(sys.platform.startswith("win"), reason="Issue 489")