Merge pull request #1608 from flatcar/t-lo/garbage-collect-by-date-remove-orphans

ci-automation/garbage_collect.sh: Add min age, remove orphan directories
This commit is contained in:
Thilo Fromm 2024-01-29 15:20:51 +01:00 committed by GitHub
commit 4f10dd9b06
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 137 additions and 16 deletions

View File

@ -8,10 +8,29 @@
#
# garbage_collect() should be called after sourcing.
#
# The garbage collector will remove artifacts of all NON-RELEASE versions from the build cache
# which BOTH
# * exceed the number of builds to keep (defaults to 50)
# AND
# * are older than the minimum purge age (14 days by default)
#
# Note that the min age threshold can lead to MORE than 50 builds being kept if this script
# is run with its default values.
#
# Additionally, the garbage collector will remove all artifacts and directories that do not have
# a version TAG in the scripts repository.
#
# OPTIONAL INPUT
# - Number of (recent) versions to keep. Defaults to 50.
# Explicitly setting this value will reset the minimum age (see below) to 0 days.
# - Minimum age of version tag to be purged, in days. Defaults to 14.
# Only artifacts older than this AND exceeding the builds to keep threshold
# will be removed.
# - PURGE_VERSIONS (Env variable). Space-separated list of versions to purge
# instead of all but the 50 most recent ones.
# Setting this will IGNORE minimum age and number of versions to keep.
# NOTE that only dev versions (not official releases) may be specified.
# This is to prevent accidental deletion of official release tags from the git repo.
# - DRY_RUN (Env variable). Set to "y" to just list what would be done but not
# actually purge anything.
@ -38,26 +57,60 @@ function garbage_collect() {
# --
function _garbage_collect_impl() {
local keep="${1:-50}"
local keep="${1:-}"
local min_age_days="${2:-}"
local dry_run="${DRY_RUN:-}"
local purge_versions="${PURGE_VERSIONS:-}"
local versions_detected="$(git tag -l --sort=-committerdate \
| grep -E '(main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+\-.*' \
| grep -vE '(-pro)$')"
# Set defaults; user-provided 'keep' has priority over default 'min_age_days'
if [ -n "${keep}" -a -z "${min_age_days}" ] ; then
min_age_days="0"
elif [ -z "${keep}" ] ; then
keep="50"
fi
if [ -z "${min_age_days}" ] ; then
min_age_days="14"
fi
echo "######## Full list of version(s) found ########"
echo "${versions_detected}" | awk '{printf "%5d %s\n", NR, $0}'
local min_age_date="$(date -d "${min_age_days} days ago" +'%Y-%m-%d')"
echo "######## Garbage collector starting ########"
echo
if [ -z "${purge_versions}" ] ; then
echo "Number of versions to keep: '${keep}'"
echo "Keep newer than: '${min_age_date}' (overrides number of versions to keep)"
fi
echo
if [ -z "${purge_versions}" ] ; then
keep="$((keep + 1))" # for tail -n+...
# Generate a list "<timestamp> | <tagname>" from all repo tags that look like dev versions
local versions_detected="$(git tag -l --sort=-committerdate \
--format="%(creatordate:format:%Y-%m-%d) | %(refname:strip=2)" \
| grep -E '.*\| (main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+-.*' \
| grep -vE '(-pro)$')"
echo "######## Full list of version(s) and their creation dates ########"
echo
echo "${versions_detected}" | awk '{printf "%5d %s\n", NR, $0}'
# Filter minimum number of versions to keep, min age
purge_versions="$(echo "${versions_detected}" \
| tail -n+"${keep}")"
| awk -v keep="${keep}" -v min_age="${min_age_date}" '{
if (keep > 0) {
keep = keep - 1
next
}
if ($1 > min_age)
next
print $3
}')"
else
# make sure we only accept dev versions
# User-provided version list, make sure we only accept dev versions
purge_versions="$(echo "${purge_versions}" | sed 's/ /\n/g' \
| grep -E '(main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+\-.*' \
| grep -vE '(-pro)$')"
keep=0
fi
source ci-automation/ci_automation_common.sh
@ -71,7 +124,7 @@ function _garbage_collect_impl() {
echo "(NOTE this is just a dry run since DRY_RUN=y)"
echo
fi
echo "${purge_versions}" | awk -v keep="${keep}" '{if ($0 == "") next; printf "%5d %s\n", NR + keep - 1, $0}'
echo "${purge_versions}" | awk '{if ($0 == "") next; printf "%5d %s\n", NR, $0}'
echo
echo
@ -90,7 +143,7 @@ function _garbage_collect_impl() {
local os_docker_vernum="$(vernum_to_docker_image_version "${FLATCAR_VERSION}")"
# Remove container image tarballs and SDK tarball (if applicable)
#
# Keep in sync with "orphaned directories" clean-up below.
local rmpat=""
rmpat="${BUILDCACHE_PATH_PREFIX}/sdk/*/${os_vernum}/"
rmpat="${rmpat} ${BUILDCACHE_PATH_PREFIX}/containers/${os_docker_vernum}/flatcar-sdk-*"
@ -144,6 +197,54 @@ function _garbage_collect_impl() {
fi
done
echo
echo "########################################"
echo
echo Checking for orphaned directories
echo
local dir=""
for dir in "sdk/amd64" \
"containers" \
"boards/amd64-usr" \
"boards/arm64-usr" \
"images/amd64" \
"images/arm64" \
"testing" \
; do
local fullpath="${BUILDCACHE_PATH_PREFIX}/${dir}"
echo
echo "## Processing '${fullpath}'"
echo "---------------------------"
local version=""
for version in $($sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" "ls -1 ${BUILDCACHE_PATH_PREFIX}/${dir}"); do
if [ "${dir}" = "containers" ] && echo "${version/+/-}" | grep -qE '.*-github-.*'; then
echo "Ignoring github CI SDK container in '${fullpath}/${version}'."
echo "Github CI SDK artifacts are handled by 'garbage_collect_github_ci_sdk.sh'"
echo " in a later step".
continue
fi
if ! git tag -l | grep -q "${version/+/-}"; then
local o_fullpath="${fullpath}/${version}"
echo
echo "## No tag '${version/+/-}' for orphan directory '${o_fullpath}'; removing."
echo "## The following files will be removed ##"
$sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \
"ls -la ${o_fullpath} || true"
if [ "$dry_run" != "y" ] ; then
set -x
$sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \
"rm -rf ${o_fullpath} || true"
set +x
else
echo "## (DRY_RUN=y so not doing anything) ##"
fi
echo
fi
done
done
echo
echo "########################################"
echo
@ -170,6 +271,6 @@ function _garbage_collect_impl() {
echo
source ci-automation/garbage_collect_github_ci_sdk.sh
garbage_collect_github_ci
garbage_collect_github_ci 1 "${min_age_days}"
}
# --

View File

@ -10,6 +10,9 @@
#
# OPTIONAL INPUT
# - Number of (recent) Github SDK builds to keep. Defaults to 20.
# - Minimum age of version tag to be purged, in days. Defaults to 14.
# Only artifacts older than this AND exceeding the builds to keep threshold
# will be removed.
# - DRY_RUN (Env variable). Set to "y" to just list what would be done but not
# actually purge anything.
@ -34,8 +37,10 @@ function garbage_collect_github_ci() {
function _garbage_collect_github_ci_impl() {
local keep="${1:-20}"
local min_age_days="${2:-14}"
local dry_run="${DRY_RUN:-}"
local min_age_date="$(date -d "${min_age_days} days ago" +'%Y_%m_%d')"
# Example version string
# <a href="./3598.0.0-nightly-20230508-2100-github-2023_05_09__08_06_54/">
# <a href="./3598.0.0-nightly-20230508-2100-github-pr-12345-2023_05_09__08_06_54/">
@ -49,15 +54,30 @@ function _garbage_collect_github_ci_impl() {
# 3. remove the "/"
local versions_sorted="$(echo "${versions_detected}" \
| sed 's/\(-github\(-pr-[0-9]*\)*-\)/\1\//' \
| sort -k 2 -t / \
| sort -k 2 -t / -r \
| sed 's:/::')"
echo
echo "Number of versions to keep: '${keep}'"
echo "Keep newer than: '${min_age_date}'"
echo
echo "######## Full list of version(s) found ########"
echo "${versions_sorted}" | awk '{printf "%5d %s\n", NR, $0}'
local tail_keep="$((keep + 1))" # for tail -n+...
local purge_versions
mapfile -t purge_versions < <(tail -n+"${tail_keep}" <<<"${versions_sorted}")
mapfile -t purge_versions < <(echo "${versions_sorted}" \
| awk -v keep="${keep}" -v min_age="${min_age_date}" '{
if (keep > 0) {
keep = keep - 1
next
}
ts = gensub(".*-github-([0-9_]+)__.*","\\1","g",$1)
if (ts > min_age)
next
print $1
}')
source ci-automation/ci_automation_common.sh
local sshcmd="$(gen_sshcmd)"
@ -69,7 +89,7 @@ function _garbage_collect_github_ci_impl() {
echo "(NOTE this is just a dry run since DRY_RUN=y)"
echo
fi
printf '%s\n' "${purge_versions[@]}" | awk -v keep="${keep}" '{if ($0 == "") next; printf "%5d %s\n", NR + keep, $0}'
printf '%s\n' "${purge_versions[@]}" | awk '{if ($0 == "") next; printf "%5d %s\n", NR, $0}'
echo
echo