diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..8be9fb60 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,71 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +repos: + #- repo: https://github.com/pre-commit/pre-commit-hooks + # rev: v4.4.0 + # hooks: + # - id: trailing-whitespace + # exclude: | + # (?x)^( + # ^rapids-cmake/cpm/patches/.* + # ) + # - id: end-of-file-fixer + # exclude: | + # (?x)^( + # ^rapids-cmake/cpm/patches/.* + # ) + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v11.1.0 + hooks: + - id: clang-format + types_or: [c, c++, cuda] + args: ["-fallback-style=none", "-style=file", "-i"] + #- repo: https://github.com/codespell-project/codespell + # rev: v2.2.2 + # hooks: + # - id: codespell + # exclude: | + # (?x)^( + # ^CHANGELOG.md$ + # ) + - repo: local + hooks: + - id: copyright-check + name: copyright-check + entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year + language: python + pass_filenames: false + additional_dependencies: [gitpython] + - id: cmake-format + name: cmake-format + entry: ./ci/checks/run-cmake-format.sh cmake-format + language: python + types: [cmake] + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + files: | + (?x)^( + ^rapids-cmake/.*$ + ) + - id: cmake-lint + name: cmake-lint + entry: ./ci/checks/run-cmake-format.sh cmake-lint + language: python + types: [cmake] + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + files: | + (?x)^( + ^rapids-cmake/.*$ + ) + +default_language_version: + python: python3 diff --git a/ci/check_style.sh b/ci/check_style.sh index c109164e..82dc1b68 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. set -euo pipefail @@ -13,62 +13,9 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --force -f env.yaml -n checks -set +eu +set +u conda activate checks set -u - -CMAKE_FILES=(`find rapids-cmake/ | grep -E "^.*\.cmake?$|^.*/CMakeLists.txt$"`) -CMAKE_FILES+=("CMakeLists.txt") - -CMAKE_FORMATS=() -CMAKE_FORMAT_RETVAL=0 - -CMAKE_LINTS=() -CMAKE_LINT_RETVAL=0 - -for cmake_file in "${CMAKE_FILES[@]}"; do - cmake-format --in-place --first-comment-is-literal --config-files ./cmake-format-rapids-cmake.json ./ci/checks/cmake_config_format.json -- ${cmake_file} - TMP_CMAKE_FORMAT=`git diff --color --exit-code -- ${cmake_file}` - TMP_CMAKE_FORMAT_RETVAL=$? - if [ "$TMP_CMAKE_FORMAT_RETVAL" != "0" ]; then - CMAKE_FORMAT_RETVAL=1 - CMAKE_FORMATS+=("$TMP_CMAKE_FORMAT") - fi - - TMP_CMAKE_LINT=`cmake-lint --config-files ./cmake-format-rapids-cmake.json ./ci/checks/cmake_config_format.json ./ci/checks/cmake_config_lint.json -- ${cmake_file}` - TMP_CMAKE_LINT_RETVAL=$? - if [ "$TMP_CMAKE_LINT_RETVAL" != "0" ]; then - CMAKE_LINT_RETVAL=1 - CMAKE_LINTS+=("$TMP_CMAKE_LINT") - fi -done - -# Output results if failure otherwise show pass -if [ "$CMAKE_FORMAT_RETVAL" != "0" ]; then - echo -e "\n\n>>>> FAILED: cmake format check; begin output\n\n" - for CMAKE_FORMAT in "${CMAKE_FORMATS[@]}"; do - echo -e "$CMAKE_FORMAT" - echo -e "\n" - done - echo -e "\n\n>>>> FAILED: cmake format check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: cmake format check\n\n" -fi - -if [ "$CMAKE_LINT_RETVAL" != "0" ]; then - echo -e "\n\n>>>> FAILED: cmake lint check; begin output\n\n" - for CMAKE_LINT in "${CMAKE_LINTS[@]}"; do - echo -e "$CMAKE_LINT" - echo -e "\n" - done - echo -e "\n\n>>>> FAILED: cmake lint check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: cmake lint check\n\n" -fi - -RETVALS=($CMAKE_FORMAT_RETVAL $CMAKE_LINT_RETVAL) -IFS=$'\n' -RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1` - -exit $RETVAL +# Run pre-commit checks +pre-commit run --all-files --show-diff-on-failure diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py new file mode 100644 index 00000000..ec13e654 --- /dev/null +++ b/ci/checks/copyright.py @@ -0,0 +1,273 @@ +# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import datetime +import os +import re +import sys + +import git + +FilesToCheck = [ + re.compile(r"[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$"), + re.compile(r"CMakeLists[.]txt$"), + re.compile(r"meta[.]yaml$"), +] +ExemptFiles = [] + +# this will break starting at year 10000, which is probably OK :) +CheckSimple = re.compile( + r"Copyright *(?:\(c\))? *(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" +) +CheckDouble = re.compile( + r"Copyright *(?:\(c\))? *(\d{4})-(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" # noqa: E501 +) + + +def checkThisFile(f): + if isinstance(f, git.Diff): + if f.deleted_file or f.b_blob.size == 0: + return False + f = f.b_path + elif not os.path.exists(f) or os.stat(f).st_size == 0: + # This check covers things like symlinks which point to files that DNE + return False + for exempt in ExemptFiles: + if exempt.search(f): + return False + for checker in FilesToCheck: + if checker.search(f): + return True + return False + + +def modifiedFiles(): + """Get a set of all modified files, as Diff objects. + + The files returned have been modified in git since the merge base of HEAD + and the upstream of the target branch. We return the Diff objects so that + we can read only the staged changes. + """ + repo = git.Repo() + # Use the environment variable TARGET_BRANCH or RAPIDS_BASE_BRANCH (defined in CI) if possible + target_branch = os.environ.get("TARGET_BRANCH", os.environ.get("RAPIDS_BASE_BRANCH")) + if target_branch is None: + # Fall back to the closest branch if not on CI + target_branch = repo.git.describe( + all=True, tags=True, match="branch-*", abbrev=0 + ).lstrip("heads/") + + upstream_target_branch = None + if target_branch in repo.heads: + # Use the tracking branch of the local reference if it exists. This + # returns None if no tracking branch is set. + upstream_target_branch = repo.heads[target_branch].tracking_branch() + if upstream_target_branch is None: + # Fall back to the remote with the newest target_branch. This code + # path is used on CI because the only local branch reference is + # current-pr-branch, and thus target_branch is not in repo.heads. + # This also happens if no tracking branch is defined for the local + # target_branch. We use the remote with the latest commit if + # multiple remotes are defined. + candidate_branches = [ + remote.refs[target_branch] for remote in repo.remotes + if target_branch in remote.refs + ] + if len(candidate_branches) > 0: + upstream_target_branch = sorted( + candidate_branches, + key=lambda branch: branch.commit.committed_datetime, + )[-1] + else: + # If no remotes are defined, try to use the local version of the + # target_branch. If this fails, the repo configuration must be very + # strange and we can fix this script on a case-by-case basis. + upstream_target_branch = repo.heads[target_branch] + merge_base = repo.merge_base("HEAD", upstream_target_branch.commit)[0] + diff = merge_base.diff() + changed_files = {f for f in diff if f.b_path is not None} + return changed_files + + +def getCopyrightYears(line): + res = CheckSimple.search(line) + if res: + return int(res.group(1)), int(res.group(1)) + res = CheckDouble.search(line) + if res: + return int(res.group(1)), int(res.group(2)) + return None, None + + +def replaceCurrentYear(line, start, end): + # first turn a simple regex into double (if applicable). then update years + res = CheckSimple.sub(r"Copyright (c) \1-\1, NVIDIA CORPORATION", line) + res = CheckDouble.sub( + rf"Copyright (c) {start:04d}-{end:04d}, NVIDIA CORPORATION", + res, + ) + return res + + +def checkCopyright(f, update_current_year): + """Checks for copyright headers and their years.""" + errs = [] + thisYear = datetime.datetime.now().year + lineNum = 0 + crFound = False + yearMatched = False + + if isinstance(f, git.Diff): + path = f.b_path + lines = f.b_blob.data_stream.read().decode().splitlines(keepends=True) + else: + path = f + with open(f, encoding="utf-8") as fp: + lines = fp.readlines() + + for line in lines: + lineNum += 1 + start, end = getCopyrightYears(line) + if start is None: + continue + crFound = True + if start > end: + e = [ + path, + lineNum, + "First year after second year in the copyright " + "header (manual fix required)", + None, + ] + errs.append(e) + elif thisYear < start or thisYear > end: + e = [ + path, + lineNum, + "Current year not included in the copyright header", + None, + ] + if thisYear < start: + e[-1] = replaceCurrentYear(line, thisYear, end) + if thisYear > end: + e[-1] = replaceCurrentYear(line, start, thisYear) + errs.append(e) + else: + yearMatched = True + # copyright header itself not found + if not crFound: + e = [ + path, + 0, + "Copyright header missing or formatted incorrectly " + "(manual fix required)", + None, + ] + errs.append(e) + # even if the year matches a copyright header, make the check pass + if yearMatched: + errs = [] + + if update_current_year: + errs_update = [x for x in errs if x[-1] is not None] + if len(errs_update) > 0: + lines_changed = ", ".join(str(x[1]) for x in errs_update) + print(f"File: {path}. Changing line(s) {lines_changed}") + for _, lineNum, __, replacement in errs_update: + lines[lineNum - 1] = replacement + with open(path, "w", encoding="utf-8") as out_file: + out_file.writelines(lines) + + return errs + + +def getAllFilesUnderDir(root, pathFilter=None): + retList = [] + for dirpath, dirnames, filenames in os.walk(root): + for fn in filenames: + filePath = os.path.join(dirpath, fn) + if pathFilter(filePath): + retList.append(filePath) + return retList + + +def checkCopyright_main(): + """ + Checks for copyright headers in all the modified files. In case of local + repo, this script will just look for uncommitted files and in case of CI + it compares between branches "$PR_TARGET_BRANCH" and "current-pr-branch" + """ + retVal = 0 + + argparser = argparse.ArgumentParser( + "Checks for a consistent copyright header in git's modified files" + ) + argparser.add_argument( + "--update-current-year", + dest="update_current_year", + action="store_true", + required=False, + help="If set, " + "update the current year if a header is already " + "present and well formatted.", + ) + argparser.add_argument( + "--git-modified-only", + dest="git_modified_only", + action="store_true", + required=False, + help="If set, " + "only files seen as modified by git will be " + "processed.", + ) + + args, dirs = argparser.parse_known_args() + + if args.git_modified_only: + files = [f for f in modifiedFiles() if checkThisFile(f)] + else: + files = [] + for d in [os.path.abspath(d) for d in dirs]: + if not os.path.isdir(d): + raise ValueError(f"{d} is not a directory.") + files += getAllFilesUnderDir(d, pathFilter=checkThisFile) + + errors = [] + for f in files: + errors += checkCopyright(f, args.update_current_year) + + if len(errors) > 0: + if any(e[-1] is None for e in errors): + print("Copyright headers incomplete in some of the files!") + for e in errors: + print(" %s:%d Issue: %s" % (e[0], e[1], e[2])) + print("") + n_fixable = sum(1 for e in errors if e[-1] is not None) + path_parts = os.path.abspath(__file__).split(os.sep) + file_from_repo = os.sep.join(path_parts[path_parts.index("ci") :]) + if n_fixable > 0 and not args.update_current_year: + print( + f"You can run `python {file_from_repo} --git-modified-only " + "--update-current-year` and stage the results in git to " + f"fix {n_fixable} of these errors.\n" + ) + retVal = 1 + + return retVal + + +if __name__ == "__main__": + sys.exit(checkCopyright_main()) diff --git a/ci/checks/run-cmake-format.sh b/ci/checks/run-cmake-format.sh new file mode 100755 index 00000000..4b692ab6 --- /dev/null +++ b/ci/checks/run-cmake-format.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright (c) 2021-2023, NVIDIA CORPORATION. + +# This script is a wrapper for cmakelang that may be used with pre-commit. The +# wrapping is necessary because RAPIDS libraries split configuration for +# cmakelang linters between a local config file and a second config file that's +# shared across all of RAPIDS via rapids-cmake. In order to keep it up to date +# this file is only maintained in one place (the rapids-cmake repo) and +# pulled down during builds. We need a way to invoke CMake linting commands +# without causing pre-commit failures (which could block local commits or CI), +# while also being sufficiently flexible to allow users to maintain the config +# file independently of a build directory. +# +# This script provides the minimal functionality to enable those use cases. It +# searches in a number of predefined locations for the rapids-cmake config file +# and exits gracefully if the file is not found. If a user wishes to specify a +# config file at a nonstandard location, they may do so by setting the +# environment variable RAPIDS_CMAKE_FORMAT_FILE. +# +# This script can be invoked directly anywhere within the project repository. +# Alternatively, it may be invoked as a pre-commit hook via +# `pre-commit run (cmake-format)|(cmake-lint)`. +# +# Usage: +# bash run-cmake-format.sh {cmake-format,cmake-lint} infile [infile ...] + +RAPIDS_CMAKE_ROOT="$(realpath $(dirname $0)/../..)" +DEFAULT_RAPIDS_CMAKE_FORMAT_FILE="${RAPIDS_CMAKE_ROOT}/cmake-format-rapids-cmake.json" + +if [ -z ${RAPIDS_CMAKE_FORMAT_FILE:+PLACEHOLDER} ]; then + RAPIDS_CMAKE_FORMAT_FILE="${DEFAULT_RAPIDS_CMAKE_FORMAT_FILE}" +fi + +if [ -z ${RAPIDS_CMAKE_FORMAT_FILE:+PLACEHOLDER} ]; then + echo "The rapids-cmake cmake-format configuration file was not found in the default location: " + echo "" + echo "${DEFAULT_RAPIDS_CMAKE_FORMAT_FILE}" + echo "" + echo "Try setting the environment variable RAPIDS_CMAKE_FORMAT_FILE to the path to the config file." + exit 0 +else + echo "Using format file ${RAPIDS_CMAKE_FORMAT_FILE}" +fi + +if [[ $1 == "cmake-format" ]]; then + # We cannot pass multiple input files because of a bug in cmake-format. + # See: https://github.com/cheshirekow/cmake_format/issues/284 + for cmake_file in "${@:2}"; do + cmake-format --in-place --first-comment-is-literal --config-files ${RAPIDS_CMAKE_FORMAT_FILE} ${RAPIDS_CMAKE_ROOT}/ci/checks/cmake_config_format.json -- ${cmake_file} + done +elif [[ $1 == "cmake-lint" ]]; then + # Since the pre-commit hook is verbose, we have to be careful to only + # present cmake-lint's output (which is quite verbose) if we actually + # observe a failure. + OUTPUT=$(cmake-lint --config-files ${RAPIDS_CMAKE_FORMAT_FILE} ${RAPIDS_CMAKE_ROOT}/ci/checks/cmake_config_format.json ${RAPIDS_CMAKE_ROOT}/ci/checks/cmake_config_lint.json -- ${@:2}) + status=$? + + if ! [ ${status} -eq 0 ]; then + echo "${OUTPUT}" + fi + exit ${status} +fi diff --git a/dependencies.yaml b/dependencies.yaml index a2f57de3..446c744b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -60,4 +60,4 @@ dependencies: common: - output_types: [conda, requirements] packages: - - cmakelang=0.6.13 + - pre-commit