Skip to content

Commit f00f258

Browse files
authored
Improve cache handling (#1679)
Refactors cache handling to dedicated functions under `lib/cache.sh` (rather than being scattered around the buildpack), and makes the following improvements: - Ensures the cache is now also discards the cache when the package manager (or its version) changes. - Improves the build log output shown when restoring or discarding the cache. For example, if the cache was invalidated all reasons are now shown. - Stops performing unnecessary cache file copies when the cache is due to be invalidated. This required moving the cache restoration step to after the `bin/pre_compile` hook runs. - Fixes cache restoration in the case where an app's `requirements.txt` was formerly a symlink. - Adds buildpack metrics for the status of the cache and duration of cache restoration/saving. Fixes #1673. Fixes #1674. Fixes #1675. Fixes #1676. Fixes #1677. Fixes #1678. Prep for #796. Unblocks upgrading pip (since #1674 prevents pypa/pip#12950). GUS-W-16811131.
1 parent dba9b86 commit f00f258

18 files changed

+322
-168
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
## [Unreleased]
44

5+
- Fixed cache handling so that it now also discards the cache when the package manager (or its version) changes. ([#1679](https://github.com/heroku/heroku-buildpack-python/pull/1679))
6+
- Improved the build log output shown when restoring or discarding the cache. For example, if the cache was invalidated all reasons are now shown. ([#1679](https://github.com/heroku/heroku-buildpack-python/pull/1679))
7+
- Stopped performing unnecessary cache file copies when the cache is due to be invalidated. This required moving the cache restoration step to after the `bin/pre_compile` hook runs. ([#1679](https://github.com/heroku/heroku-buildpack-python/pull/1679))
8+
- Fixed cache restoration in the case where an app's `requirements.txt` was formerly a symlink. ([#1679](https://github.com/heroku/heroku-buildpack-python/pull/1679))
9+
- Added buildpack metrics for the status of the cache and duration of cache restoration/saving. ([#1679](https://github.com/heroku/heroku-buildpack-python/pull/1679))
510

611
## [v262] - 2024-10-25
712

bin/compile

Lines changed: 6 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ ENV_DIR="${3}"
1919
BUILDPACK_DIR=$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)
2020

2121
source "${BUILDPACK_DIR}/bin/utils"
22+
source "${BUILDPACK_DIR}/lib/cache.sh"
2223
source "${BUILDPACK_DIR}/lib/hooks.sh"
2324
source "${BUILDPACK_DIR}/lib/metadata.sh"
2425
source "${BUILDPACK_DIR}/lib/output.sh"
@@ -93,56 +94,14 @@ export PIP_NO_PYTHON_VERSION_WARNING=1
9394

9495
cd "$BUILD_DIR"
9596

96-
# The Cache
97-
# ---------
98-
99-
# The workflow for the Python Buildpack's cache is as follows:
100-
#
101-
# - `~/.heroku/{known-paths}` are copied from the cache into the slug.
102-
# - The build is executed, modifying `~/.heroku/{known-paths}`.
103-
# - Once the build is complete, `~/.heroku/{known-paths}` is copied back into the cache.
104-
105-
mkdir -p "$CACHE_DIR/.heroku"
106-
107-
# Restore old artifacts from the cache.
108-
mkdir -p .heroku
109-
# The Python installation.
110-
cp -R "$CACHE_DIR/.heroku/python" .heroku/ &>/dev/null || true
111-
# A plain text file which contains the current stack being used (used for cache busting).
112-
cp -R "$CACHE_DIR/.heroku/python-stack" .heroku/ &>/dev/null || true
113-
# A plain text file which contains the current python version being used (used for cache busting).
114-
cp -R "$CACHE_DIR/.heroku/python-version" .heroku/ &>/dev/null || true
115-
# A plain text file which contains the current sqlite3 version being used (used for cache busting).
116-
cp -R "$CACHE_DIR/.heroku/python-sqlite3-version" .heroku/ &>/dev/null || true
117-
# "editable" installations of code repositories, via pip or pipenv.
118-
if [[ -d "$CACHE_DIR/.heroku/src" ]]; then
119-
cp -R "$CACHE_DIR/.heroku/src" .heroku/ &>/dev/null || true
120-
fi
121-
12297
# Runs a `bin/pre_compile` script if found in the app source, allowing build customisation.
12398
hooks::run_hook "pre_compile"
12499

125-
# TODO: Clear the cache if this isn't a valid version, as part of the cache refactor.
126-
# (Currently the version is instead validated in `read_requested_python_version()`)
127-
if [[ -f "$CACHE_DIR/.heroku/python-version" ]]; then
128-
cached_python_version="$(cat "${CACHE_DIR}/.heroku/python-version")"
129-
# `python-X.Y.Z` -> `X.Y`
130-
cached_python_version="${cached_python_version#python-}"
131-
else
132-
cached_python_version=
133-
fi
134-
135-
# We didn't always record the stack version.
136-
if [[ -f "$CACHE_DIR/.heroku/python-stack" ]]; then
137-
CACHED_PYTHON_STACK=$(cat "$CACHE_DIR/.heroku/python-stack")
138-
else
139-
# shellcheck disable=SC2154 # TODO: Env var is referenced but not assigned.
140-
CACHED_PYTHON_STACK=$STACK
141-
fi
142-
143100
package_manager="$(package_manager::determine_package_manager "${BUILD_DIR}")"
144101
meta_set "package_manager" "${package_manager}"
145102

103+
cached_python_version="$(cache::cached_python_version "${CACHE_DIR}")"
104+
146105
# We use the Bash 4.3+ `nameref` feature to pass back multiple values from this function
147106
# without having to hardcode globals. See: https://stackoverflow.com/a/38997681
148107
python_version::read_requested_python_version "${BUILD_DIR}" "${package_manager}" "${cached_python_version}" requested_python_version python_version_origin
@@ -170,6 +129,8 @@ python_major_version="${python_full_version%.*}"
170129
meta_set "python_version" "${python_full_version}"
171130
meta_set "python_version_major" "${python_major_version}"
172131

132+
cache::restore "${BUILD_DIR}" "${CACHE_DIR}" "${STACK:?}" "${cached_python_version}" "${python_full_version}" "${package_manager}"
133+
173134
# The directory for the .profile.d scripts.
174135
mkdir -p "$(dirname "$PROFILE_PATH")"
175136
# The directory for editable VCS dependencies.
@@ -300,18 +261,6 @@ cp "${BUILDPACK_DIR}/vendor/python.gunicorn.sh" "$GUNICORN_PROFILE_PATH"
300261
# Runs a `bin/post_compile` script if found in the app source, allowing build customisation.
301262
hooks::run_hook "post_compile"
302263

303-
# Store new artifacts in the cache.
304-
rm -rf "$CACHE_DIR/.heroku/python"
305-
rm -rf "$CACHE_DIR/.heroku/python-version"
306-
rm -rf "$CACHE_DIR/.heroku/python-stack"
307-
rm -rf "$CACHE_DIR/.heroku/src"
308-
309-
mkdir -p "$CACHE_DIR/.heroku"
310-
cp -R .heroku/python "$CACHE_DIR/.heroku/"
311-
cp -R .heroku/python-version "$CACHE_DIR/.heroku/"
312-
cp -R .heroku/python-stack "$CACHE_DIR/.heroku/" &>/dev/null || true
313-
if [[ -d .heroku/src ]]; then
314-
cp -R .heroku/src "$CACHE_DIR/.heroku/" &>/dev/null || true
315-
fi
264+
cache::save "${BUILD_DIR}" "${CACHE_DIR}" "${STACK}" "${python_full_version}" "${package_manager}"
316265

317266
meta_time "total_duration" "${compile_start_time}"

bin/report

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ kv_pair_string() {
6060
}
6161

6262
STRING_FIELDS=(
63+
cache_status
6364
django_collectstatic
6465
failure_reason
6566
nltk_downloader
@@ -76,6 +77,8 @@ STRING_FIELDS=(
7677

7778
# We don't want to quote numeric or boolean fields.
7879
ALL_OTHER_FIELDS=(
80+
cache_restore_duration
81+
cache_save_duration
7982
dependencies_install_duration
8083
django_collectstatic_duration
8184
nltk_downloader_duration

bin/steps/python

Lines changed: 22 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#!/usr/bin/env bash
22
# shellcheck disable=SC2154 # TODO: Env var is referenced but not assigned.
3-
# shellcheck disable=SC2250 # TODO: Use braces around variable references even when not strictly required.
43

54
set -euo pipefail
65

@@ -27,6 +26,28 @@ if ! curl --output /dev/null --silent --head --fail --retry 3 --retry-connrefuse
2726
exit 1
2827
fi
2928

29+
if [[ -f "${BUILD_DIR}/.heroku/python/bin/python" ]]; then
30+
output::step "Using cached install of Python ${python_full_version}"
31+
else
32+
output::step "Installing Python ${python_full_version}"
33+
mkdir -p "${BUILD_DIR}/.heroku/python"
34+
35+
if ! curl --silent --show-error --fail --retry 3 --retry-connrefused --connect-timeout 10 "${PYTHON_URL}" | tar --zstd --extract --directory "${BUILD_DIR}/.heroku/python"; then
36+
# The Python version was confirmed to exist previously, so any failure here is due to
37+
# a networking issue or archive/buildpack bug rather than the runtime not existing.
38+
output::error <<-EOF
39+
Error: Failed to download/install Python ${python_full_version}.
40+
41+
In some cases, this happens due to an unstable network connection.
42+
Please try again and to see if the error resolves itself.
43+
EOF
44+
meta_set "failure_reason" "python-download"
45+
exit 1
46+
fi
47+
48+
hash -r
49+
fi
50+
3051
function warn_if_patch_update_available() {
3152
local requested_full_version="${1}"
3253
local requested_major_version="${2}"
@@ -69,69 +90,3 @@ if [[ "${python_major_version}" == "3.8" ]]; then
6990
fi
7091

7192
warn_if_patch_update_available "${python_full_version}" "${python_major_version}"
72-
73-
if [[ "$STACK" != "$CACHED_PYTHON_STACK" ]]; then
74-
output::step "Stack has changed from $CACHED_PYTHON_STACK to $STACK, clearing cache"
75-
rm -rf .heroku/python-stack .heroku/python-version .heroku/python .heroku/vendor .heroku/python .heroku/python-sqlite3-version
76-
fi
77-
78-
# TODO: Clean this up as part of the cache refactor.
79-
if [[ -f .heroku/python-version ]]; then
80-
if [[ "${cached_python_version}" != "${python_full_version}" ]]; then
81-
output::step "Python version has changed from ${cached_python_version} to ${python_full_version}, clearing cache"
82-
rm -rf .heroku/python
83-
else
84-
SKIP_INSTALL=1
85-
fi
86-
fi
87-
88-
# If using pip, check if we should reinstall python dependencies given that requirements.txt
89-
# is non-deterministic (not all packages pinned, doesn't handle uninstalls etc). We don't need
90-
# to do this when using Pipenv, since it has a lockfile and syncs the packages for us.
91-
if [[ -f "${BUILD_DIR}/requirements.txt" ]]; then
92-
if [[ ! -f "$CACHE_DIR/.heroku/requirements.txt" ]]; then
93-
# This is a the first build of an app (or the build cache was cleared). Since there
94-
# are no cached packages, we only need to store the requirements file for next time.
95-
cp -R "$BUILD_DIR/requirements.txt" "$CACHE_DIR/.heroku/requirements.txt"
96-
else
97-
# IF there IS a cached directory, check for differences with the new one
98-
if ! diff "$BUILD_DIR/requirements.txt" "$CACHE_DIR/.heroku/requirements.txt" &>/dev/null; then
99-
output::step "Requirements file has been changed, clearing cached dependencies"
100-
# if there are any differences, clear the Python cache
101-
# Installing Python over again does not take noticably more time
102-
cp -R "$BUILD_DIR/requirements.txt" "$CACHE_DIR/.heroku/requirements.txt"
103-
rm -rf .heroku/python
104-
unset SKIP_INSTALL
105-
else
106-
output::step "No change in requirements detected, installing from cache"
107-
fi
108-
fi
109-
fi
110-
111-
if [[ "${SKIP_INSTALL:-0}" == "1" ]]; then
112-
output::step "Using cached install of Python ${python_full_version}"
113-
else
114-
output::step "Installing Python ${python_full_version}"
115-
116-
# Prepare destination directory.
117-
mkdir -p .heroku/python
118-
119-
if ! curl --silent --show-error --fail --retry 3 --retry-connrefused --connect-timeout 10 "${PYTHON_URL}" | tar --zstd --extract --directory .heroku/python; then
120-
# The Python version was confirmed to exist previously, so any failure here is due to
121-
# a networking issue or archive/buildpack bug rather than the runtime not existing.
122-
output::error <<-EOF
123-
Error: Failed to download/install Python ${python_full_version}.
124-
125-
In some cases, this happens due to an unstable network connection.
126-
Please try again and to see if the error resolves itself.
127-
EOF
128-
meta_set "failure_reason" "python-download"
129-
exit 1
130-
fi
131-
132-
# Record for future reference.
133-
echo "python-${python_full_version}" >.heroku/python-version
134-
echo "$STACK" >.heroku/python-stack
135-
136-
hash -r
137-
fi

bin/steps/sqlite3

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,4 @@ buildpack_sqlite3_install() {
7878
echo "Sqlite3 failed to install."
7979
# mcount "failure.python.sqlite3"
8080
fi
81-
82-
# shellcheck disable=SC2154 # TODO: Env var is referenced but not assigned.
83-
mkdir -p "$CACHE_DIR/.heroku/"
8481
}

0 commit comments

Comments
 (0)