From 682dbfe3650c44a18a5d255317f6d0af3c0ed1ef Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Fri, 26 Jan 2024 14:20:09 +0100 Subject: [PATCH 1/3] ci-automation/garbage_collect.sh: min age, orphan removal This change improves the build cache garbage collector to remove orphaned artifact directories - i.e. directories to which no version tag exists in the scripts repo. SDK containers built by Github actions (using update_sdk_container) are igored by this change because these are handled in a separate garbage collection script. Also, a new command line parameter has been added to remove artifacts older than the specified number of days (defaulting to 14): - If neither number of builds nor max age is specified, the script defaults to 50 builds to keep, and a max age of 14 days. The max age overrides the number of builds to keep, so more than 50 builds may be kept. - If only the number of builds to keep is specified, the max age is set to "0" (i.e. today). - If both are specified, max age again overrides number of builds to keep. Signed-off-by: Thilo Fromm --- ci-automation/garbage_collect.sh | 121 ++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 11 deletions(-) diff --git a/ci-automation/garbage_collect.sh b/ci-automation/garbage_collect.sh index 0bac4081ae..15bb5be855 100644 --- a/ci-automation/garbage_collect.sh +++ b/ci-automation/garbage_collect.sh @@ -8,10 +8,27 @@ # # garbage_collect() should be called after sourcing. # +# The garbage collector will remove artifacts of all NON-RELEASE versions from the build cache +# which BOTH +# * exceed the number of builds to keep (defaults to 50) +# AND +# * are older than the minimum purge age (14 days by default) +# +# Note that the min age threshold can lead to MORE than 50 builds being kept if this script +# is run with its default values. +# +# Additionally, the garbage collector will remove all artifacts and directories that do not have +# a version TAG in the scripts repository. +# # OPTIONAL INPUT # - Number of (recent) versions to keep. Defaults to 50. +# Explicitly setting this value will reset the minimum age (see below) to 0 days. +# - Minimum age of version tag to be purged, in days. Defaults to 14. # - PURGE_VERSIONS (Env variable). Space-separated list of versions to purge # instead of all but the 50 most recent ones. +# Setting this will IGNORE minimum age and number of versions to keep. +# NOTE that only dev versions (not official releases) may be specified. +# This is to prevent accidental deletion of official release tags from the git repo. # - DRY_RUN (Env variable). Set to "y" to just list what would be done but not # actually purge anything. @@ -38,26 +55,60 @@ function garbage_collect() { # -- function _garbage_collect_impl() { - local keep="${1:-50}" + local keep="${1:-}" + local min_age_days="${2:-}" local dry_run="${DRY_RUN:-}" local purge_versions="${PURGE_VERSIONS:-}" - local versions_detected="$(git tag -l --sort=-committerdate \ - | grep -E '(main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+\-.*' \ - | grep -vE '(-pro)$')" + # Set defaults; user-provided 'keep' has priority over default 'min_age_days' + if [ -n "${keep}" -a -z "${min_age_days}" ] ; then + min_age_days="0" + elif [ -z "${keep}" ] ; then + keep="50" + fi + if [ -z "${min_age_days}" ] ; then + min_age_days="14" + fi - echo "######## Full list of version(s) found ########" - echo "${versions_detected}" | awk '{printf "%5d %s\n", NR, $0}' + local min_age_date="$(date -d "${min_age_days} days ago" +'%Y-%m-%d')" + echo "######## Garbage collector starting ########" + echo + if [ -z "${purge_versions}" ] ; then + echo "Number of versions to keep: '${keep}'" + echo "Keep newer than: '${min_age_date}' (overrides number of versions to keep)" + fi + echo if [ -z "${purge_versions}" ] ; then - keep="$((keep + 1))" # for tail -n+... + # Generate a list " | " from all repo tags that look like dev versions + local versions_detected="$(git tag -l --sort=-committerdate \ + --format="%(creatordate:format:%Y-%m-%d) | %(refname:strip=2)" \ + | grep -E '.*\| (main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+-.*' \ + | grep -vE '(-pro)$')" + + echo "######## Full list of version(s) and their creation dates ########" + echo + echo "${versions_detected}" | awk '{printf "%5d %s\n", NR, $0}' + + # Filter minimum number of versions to keep, min age purge_versions="$(echo "${versions_detected}" \ - | tail -n+"${keep}")" + | awk -v keep="${keep}" -v min_age="${min_age_date}" '{ + if (keep > 0) { + keep = keep - 1 + next + } + + if ($1 > min_age) + next + + print $3 + }')" else - # make sure we only accept dev versions + # User-provided version list, make sure we only accept dev versions purge_versions="$(echo "${purge_versions}" | sed 's/ /\n/g' \ | grep -E '(main|alpha|beta|stable|lts)-[0-9]+\.[0-9]+\.[0-9]+\-.*' \ | grep -vE '(-pro)$')" + keep=0 fi source ci-automation/ci_automation_common.sh @@ -71,7 +122,7 @@ function _garbage_collect_impl() { echo "(NOTE this is just a dry run since DRY_RUN=y)" echo fi - echo "${purge_versions}" | awk -v keep="${keep}" '{if ($0 == "") next; printf "%5d %s\n", NR + keep - 1, $0}' + echo "${purge_versions}" | awk '{if ($0 == "") next; printf "%5d %s\n", NR, $0}' echo echo @@ -90,7 +141,7 @@ function _garbage_collect_impl() { local os_docker_vernum="$(vernum_to_docker_image_version "${FLATCAR_VERSION}")" # Remove container image tarballs and SDK tarball (if applicable) - # + # Keep in sync with "orphaned direcrories" clean-up below. local rmpat="" rmpat="${BUILDCACHE_PATH_PREFIX}/sdk/*/${os_vernum}/" rmpat="${rmpat} ${BUILDCACHE_PATH_PREFIX}/containers/${os_docker_vernum}/flatcar-sdk-*" @@ -144,6 +195,54 @@ function _garbage_collect_impl() { fi done + echo + echo "########################################" + echo + echo Checking for orphaned directories + echo + + local dir="" + for dir in "sdk/amd64" \ + "containers" \ + "boards/amd64-usr" \ + "boards/arm64-usr" \ + "images/amd64" \ + "images/arm64" \ + "testing" \ + ; do + local fullpath="${BUILDCACHE_PATH_PREFIX}/${dir}" + echo + echo "## Processing '${fullpath}'" + echo "---------------------------" + local version="" + for version in $($sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" "ls -1 ${BUILDCACHE_PATH_PREFIX}/${dir}"); do + if [ "${dir}" = "containers" ] && echo "${version/+/-}" | grep -qE '.*-github-.*'; then + echo "Ignoring github CI SDK container in '${fullpath}/${version}'." + echo "Github CI SDK artifacts are handled by 'garbage_collect_github_ci_sdk.sh'" + echo " in a later step". + continue + fi + if ! git tag -l | grep -q "${version/+/-}"; then + local o_fullpath="${fullpath}/${version}" + echo + echo "## No tag '${version/+/-}' for orphan directory '${o_fullpath}'; removing." + echo "## The following files will be removed ##" + $sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \ + "ls -la ${o_fullpath} || true" + + if [ "$dry_run" != "y" ] ; then + set -x + $sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \ + "rm -rf ${o_fullpath} || true" + set +x + else + echo "## (DRY_RUN=y so not doing anything) ##" + fi + echo + fi + done + done + echo echo "########################################" echo From aaf9deac28c9222b235884b560630de4a2d70fda Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Fri, 26 Jan 2024 15:47:54 +0100 Subject: [PATCH 2/3] ci-automation/garbage_collect_github_ci_sdk.sh: add min_age parameter This change adds a min_age parameter to the github CI SDK garbage collector. The parameter specifies a minimum age (in days) for artifacts to be garbage collected. NOTE that this can result in more artifacts being kept than specified via the "keep" parameter if artifacts are younger than min_age. The change also has garbage_collect.sh pass the min_age parameter to garbage_collect_github_ci_sdk.sh. Signed-off-by: Thilo Fromm --- ci-automation/garbage_collect.sh | 4 ++- .../garbage_collect_github_ci_sdk.sh | 28 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/ci-automation/garbage_collect.sh b/ci-automation/garbage_collect.sh index 15bb5be855..3c7bb5a049 100644 --- a/ci-automation/garbage_collect.sh +++ b/ci-automation/garbage_collect.sh @@ -24,6 +24,8 @@ # - Number of (recent) versions to keep. Defaults to 50. # Explicitly setting this value will reset the minimum age (see below) to 0 days. # - Minimum age of version tag to be purged, in days. Defaults to 14. +# Only artifacts older than this AND exceeding the builds to keep threshold +# will be removed. # - PURGE_VERSIONS (Env variable). Space-separated list of versions to purge # instead of all but the 50 most recent ones. # Setting this will IGNORE minimum age and number of versions to keep. @@ -269,6 +271,6 @@ function _garbage_collect_impl() { echo source ci-automation/garbage_collect_github_ci_sdk.sh - garbage_collect_github_ci + garbage_collect_github_ci 1 "${min_age_days}" } # -- diff --git a/ci-automation/garbage_collect_github_ci_sdk.sh b/ci-automation/garbage_collect_github_ci_sdk.sh index 0d12cbe82a..ac11d66247 100644 --- a/ci-automation/garbage_collect_github_ci_sdk.sh +++ b/ci-automation/garbage_collect_github_ci_sdk.sh @@ -10,6 +10,9 @@ # # OPTIONAL INPUT # - Number of (recent) Github SDK builds to keep. Defaults to 20. +# - Minimum age of version tag to be purged, in days. Defaults to 14. +# Only artifacts older than this AND exceeding the builds to keep threshold +# will be removed. # - DRY_RUN (Env variable). Set to "y" to just list what would be done but not # actually purge anything. @@ -34,8 +37,10 @@ function garbage_collect_github_ci() { function _garbage_collect_github_ci_impl() { local keep="${1:-20}" + local min_age_days="${2:-14}" local dry_run="${DRY_RUN:-}" + local min_age_date="$(date -d "${min_age_days} days ago" +'%Y_%m_%d')" # Example version string # # @@ -49,15 +54,30 @@ function _garbage_collect_github_ci_impl() { # 3. remove the "/" local versions_sorted="$(echo "${versions_detected}" \ | sed 's/\(-github\(-pr-[0-9]*\)*-\)/\1\//' \ - | sort -k 2 -t / \ + | sort -k 2 -t / -r \ | sed 's:/::')" + echo + echo "Number of versions to keep: '${keep}'" + echo "Keep newer than: '${min_age_date}'" + echo + echo "######## Full list of version(s) found ########" echo "${versions_sorted}" | awk '{printf "%5d %s\n", NR, $0}' - local tail_keep="$((keep + 1))" # for tail -n+... local purge_versions - mapfile -t purge_versions < <(tail -n+"${tail_keep}" <<<"${versions_sorted}") + mapfile -t purge_versions < <(echo "${versions_sorted}" \ + | awk -v keep="${keep}" -v min_age="${min_age_date}" '{ + if (keep > 0) { + keep = keep - 1 + next + } + ts = gensub(".*-github-([0-9_]+)__.*","\\1","g",$1) + if (ts > min_age) + next + + print $1 + }') source ci-automation/ci_automation_common.sh local sshcmd="$(gen_sshcmd)" @@ -69,7 +89,7 @@ function _garbage_collect_github_ci_impl() { echo "(NOTE this is just a dry run since DRY_RUN=y)" echo fi - printf '%s\n' "${purge_versions[@]}" | awk -v keep="${keep}" '{if ($0 == "") next; printf "%5d %s\n", NR + keep, $0}' + printf '%s\n' "${purge_versions[@]}" | awk '{if ($0 == "") next; printf "%5d %s\n", NR, $0}' echo echo From 521d2be6047e49a855ace2a5aed2c0e310da64ca Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Mon, 29 Jan 2024 12:40:50 +0100 Subject: [PATCH 3/3] ci-automation/garbage_collect.sh: Fix typo Co-authored-by: Krzesimir Nowak --- ci-automation/garbage_collect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-automation/garbage_collect.sh b/ci-automation/garbage_collect.sh index 3c7bb5a049..4ad8f4d3c4 100644 --- a/ci-automation/garbage_collect.sh +++ b/ci-automation/garbage_collect.sh @@ -143,7 +143,7 @@ function _garbage_collect_impl() { local os_docker_vernum="$(vernum_to_docker_image_version "${FLATCAR_VERSION}")" # Remove container image tarballs and SDK tarball (if applicable) - # Keep in sync with "orphaned direcrories" clean-up below. + # Keep in sync with "orphaned directories" clean-up below. local rmpat="" rmpat="${BUILDCACHE_PATH_PREFIX}/sdk/*/${os_vernum}/" rmpat="${rmpat} ${BUILDCACHE_PATH_PREFIX}/containers/${os_docker_vernum}/flatcar-sdk-*"