From 8bc10465e0471c78dff3c66c5e6579937cb062bb Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Tue, 30 Jan 2024 09:12:31 +0100 Subject: [PATCH 1/6] ci-automation/garbage_collect*: unset POSIXLY_CORRECT for awk gensub is a GNU extension; however, POSIXLY_CORRECT is enforced in systemd-run which triggers gawk's traditional / posix mode. Unset it before calling gawk to make gensub available. Signed-off-by: Thilo Fromm --- ci-automation/garbage_collect_github_ci_sdk.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci-automation/garbage_collect_github_ci_sdk.sh b/ci-automation/garbage_collect_github_ci_sdk.sh index ac11d66247..58ace907bc 100644 --- a/ci-automation/garbage_collect_github_ci_sdk.sh +++ b/ci-automation/garbage_collect_github_ci_sdk.sh @@ -66,7 +66,8 @@ function _garbage_collect_github_ci_impl() { echo "${versions_sorted}" | awk '{printf "%5d %s\n", NR, $0}' local purge_versions - mapfile -t purge_versions < <(echo "${versions_sorted}" \ + mapfile -t purge_versions < <(unset POSIXLY_CORRECT; \ + echo "${versions_sorted}" \ | awk -v keep="${keep}" -v min_age="${min_age_date}" '{ if (keep > 0) { keep = keep - 1 From 2d5c17cb3c2a4b777b2be62d56d8ec0b15a5f267 Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Tue, 30 Jan 2024 13:38:22 +0100 Subject: [PATCH 2/6] ci-automation/garbage_collect: add cached release artifacts This change extends the garbage collector for the build cache server to remove cached release artifacts. Release artifacts are copied to the official mirrors and do not need to remain on the build cache after a release was published. By default, the 10 latest releases of all channels (including LTS and previous LTS) are kept. Also excluded from garbage collection are: - Emerging new major releases (i.e. major number larger than the latest Alpha release) - channel progressions (major number exists in the lists of releases to keep but minor is bigger than any release) - patch releases (major and minor exist in list of releases to keep but patch level is newer than in any release) - SDKs (tarballs and containers) of any release in the list of releases to keep; i.e. the SDK in .0.0 for any release to keep. Signed-off-by: Thilo Fromm --- ci-automation/ci-config.env | 1 + ci-automation/garbage_collect.sh | 8 + .../garbage_collect_github_ci_sdk.sh | 9 +- ci-automation/garbage_collect_releases.sh | 180 ++++++++++++++++++ 4 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 ci-automation/garbage_collect_releases.sh diff --git a/ci-automation/ci-config.env b/ci-automation/ci-config.env index 4b44ddd547..6c177bb4c5 100644 --- a/ci-automation/ci-config.env +++ b/ci-automation/ci-config.env @@ -14,6 +14,7 @@ BUILDCACHE_USER="bincache" RELEASES_SERVER="mirror.release.flatcar-linux.net" CONTAINER_REGISTRY="ghcr.io/flatcar" +RELEASES_JSON_FEED="https://www.flatcar.org/releases-json/releases.json" GC_BUCKET="flatcar-linux" diff --git a/ci-automation/garbage_collect.sh b/ci-automation/garbage_collect.sh index 4ad8f4d3c4..a84e24c201 100644 --- a/ci-automation/garbage_collect.sh +++ b/ci-automation/garbage_collect.sh @@ -272,5 +272,13 @@ function _garbage_collect_impl() { source ci-automation/garbage_collect_github_ci_sdk.sh garbage_collect_github_ci 1 "${min_age_days}" + + echo + echo "########################################" + echo + echo Running Release Artifacts cache garbage collector + echo + source ci-automation/garbage_collect_releases.sh + garbage_collect_releases } # -- diff --git a/ci-automation/garbage_collect_github_ci_sdk.sh b/ci-automation/garbage_collect_github_ci_sdk.sh index 58ace907bc..6a05ca3174 100644 --- a/ci-automation/garbage_collect_github_ci_sdk.sh +++ b/ci-automation/garbage_collect_github_ci_sdk.sh @@ -41,10 +41,14 @@ function _garbage_collect_github_ci_impl() { local dry_run="${DRY_RUN:-}" local min_age_date="$(date -d "${min_age_days} days ago" +'%Y_%m_%d')" + + source ci-automation/ci_automation_common.sh + local sshcmd="$(gen_sshcmd)" + # Example version string # # - local versions_detected="$(curl -s https://bincache.flatcar-linux.net/containers/ \ + local versions_detected="$(curl -s https://${BUILDCACHE_SERVER}/containers/ \ | grep -E '\' \ | sed 's:.*\"./\([^/]\+\)/".*:\1:' )" @@ -80,9 +84,6 @@ function _garbage_collect_github_ci_impl() { print $1 }') - source ci-automation/ci_automation_common.sh - local sshcmd="$(gen_sshcmd)" - echo echo "######## The following version(s) will be purged ########" if [ "$dry_run" = "y" ] ; then diff --git a/ci-automation/garbage_collect_releases.sh b/ci-automation/garbage_collect_releases.sh new file mode 100644 index 0000000000..907dfb0673 --- /dev/null +++ b/ci-automation/garbage_collect_releases.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# +# Copyright (c) 2021 The Flatcar Maintainers. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# >>> This file is supposed to be SOURCED from the repository ROOT. <<< +# +# garbage_collect_releases() should be called after sourcing. +# +# OPTIONAL INPUT +# - Number releases to keep per channel. Defaults to 10. +# - Number of LTS channels to keep. Defaults to 3 (i.e. the current and the previous (deprecated) LTS). +# - DRY_RUN (Env variable). Set to "y" to just list what would be done but not +# actually purge anything. + +# Flatcar build cache releases artifacts garbage collector. +# This script removes release artifacts of past releases from the build cache. +# Note that release artifacts are copied to official mirrors upon release, so there's +# no need to keep a copy on the build cache server. + +function garbage_collect_releases() { + # Run a subshell, so the traps, environment changes and global + # variables are not spilled into the caller. + ( + set -euo pipefail + + _garbage_collect_releases_impl "${@}" + ) +} +# -- + +function _garbage_collect_releases_impl() { + local keep_per_chan="${1:-10}" + local keep_lts_releases="${2:-2}" + local dry_run="${DRY_RUN:-}" + + echo + echo "Number of versions to keep per channel: '${keep_per_chan}'" + echo "Number of LTS major releases to keep: '${keep_lts_releases}'" + echo + + source ci-automation/ci_automation_common.sh + local sshcmd="$(gen_sshcmd)" + + local keep_versions + mapfile -t keep_versions < <(unset POSIXLY_CORRECT; \ + curl -s "${RELEASES_JSON_FEED}" \ + | jq '[keys]' \ + | sed -n 's/.*"\([0-9]\+\.[0-9]\+\.[0-9]\+\)".*/\1/p' \ + | sort -Vr \ + | awk -v keep="${keep_per_chan}" -v lts="${keep_lts_releases}" ' + { + version = $1 + chan_num = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", version) + 0 + major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", version) + 0 + + if (chan_num <= 2) { + if (chan_count[chan_num] < keep) + print version + chan_count[chan_num] = chan_count[chan_num] + 1 + } else { + if ( (chan_count["lts"][major] < keep) \ + && (length(chan_count["lts"]) <= lts) ) + print version + chan_count["lts"][major] = chan_count["lts"][major] + 1 + } + } ') + + echo + echo "######## The following version(s) will be kept ########" + if [ "$dry_run" = "y" ] ; then + echo + echo "(NOTE this is just a dry run since DRY_RUN=y)" + echo + fi + printf "%s\n" "${keep_versions[@]}" + + local dir="" + for dir in "sdk/amd64" \ + "containers" \ + "boards/amd64-usr" \ + "boards/arm64-usr" \ + "images/amd64" \ + "images/arm64" \ + "testing" \ + ; do + + local fullpath="${BUILDCACHE_PATH_PREFIX}/${dir}" + echo + echo "## Processing '${fullpath}'" + echo "---------------------------" + for version in $($sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \ + "ls -1 ${BUILDCACHE_PATH_PREFIX}/${dir} | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$'"); do + local o_fullpath="${fullpath}/${version}" + + # skip if version is marked for keeping OR if it's a new release about to be published + if printf "%s\n" "${keep_versions[@]}" \ + | { unset POSIXLY_CORRECT ; awk -v version="${version}" -v path="${dir}" ' + BEGIN { + vmajor = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", version) + 0 + vminor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", version) + 0 + vpatch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g", version) + 0 + ret = 1 + } + + { + if ($0 == version) { + print "" + print "## Skipping " version " because it is in the keep list." + ret = 0 + exit + } + + major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g") + 0 + minor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g") + 0 + patch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g") + 0 + + if ( ((path == "sdk/amd64") || (path == "containers")) \ + && (vmajor == major) && (vminor == 0) && (vpatch == 0) ) { + print "" + print "## Skipping " version " in " path " because it contains the SDK for release " $0 " in keep list." + ret = 0 + exit + } + + if (major_alpha == "") + major_alpha = major + + if (vmajor > major_alpha) { + print "" + print "## Skipping " version " because major version is higher than the latest Alpha (" major_alpha ") in keep list." + print "(I.e. this is an unpublished new Alpha release)" + ret = 0 + exit + } + + if ((vmajor == major) && (vminor > minor)) { + print "" + print "## Skipping " version " because major version is in keep list and minor version is higher than the latest release." + print "(I.e. this is an unpublished channel progression " $0 " -> " version ")" + ret = 0 + exit + } + + if ((vmajor == major) && (vminor == minor) && (vpatch > patch)) { + print "" + print "## Skipping " version " because major and minor versions are in keep list and patch version is higher than the latest release." + print "(I.e. this is an unpublished new patch release " $0 " -> " version ")" + ret = 0 + exit + } + } + + END { + exit ret + }' ; } then + continue + fi + + echo + echo "## Removing version '${version}' in '${o_fullpath}'" + echo + + echo "## The following files will be removed ##" + $sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \ + "ls -la ${o_fullpath} || true" + + if [ "$dry_run" != "y" ] ; then + set -x + $sshcmd "${BUILDCACHE_USER}@${BUILDCACHE_SERVER}" \ + "rm -rf ${o_fullpath} || true" + set +x + else + echo "## (DRY_RUN=y so not doing anything) ##" + fi + done + done +} +# -- From 9359a832996f560754232f82457811c9854b5266 Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Mon, 12 Feb 2024 16:07:40 +0100 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Krzesimir Nowak --- ci-automation/garbage_collect_releases.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ci-automation/garbage_collect_releases.sh b/ci-automation/garbage_collect_releases.sh index 907dfb0673..988d572d9a 100644 --- a/ci-automation/garbage_collect_releases.sh +++ b/ci-automation/garbage_collect_releases.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2021 The Flatcar Maintainers. +# Copyright (c) 2024 The Flatcar Maintainers. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -10,7 +10,7 @@ # # OPTIONAL INPUT # - Number releases to keep per channel. Defaults to 10. -# - Number of LTS channels to keep. Defaults to 3 (i.e. the current and the previous (deprecated) LTS). +# - Number of LTS channels to keep. Defaults to 2 (i.e. the current and the previous (deprecated) LTS). # - DRY_RUN (Env variable). Set to "y" to just list what would be done but not # actually purge anything. @@ -46,8 +46,7 @@ function _garbage_collect_releases_impl() { local keep_versions mapfile -t keep_versions < <(unset POSIXLY_CORRECT; \ curl -s "${RELEASES_JSON_FEED}" \ - | jq '[keys]' \ - | sed -n 's/.*"\([0-9]\+\.[0-9]\+\.[0-9]\+\)".*/\1/p' \ + | jq -r 'keys_unsorted | .[] | match("[0-9]+\\.[0-9]+\\.[0-9]+") | .string' \ | sort -Vr \ | awk -v keep="${keep_per_chan}" -v lts="${keep_lts_releases}" ' { From 88857b496a2d801a7c9dc06f33f88793653992c9 Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Mon, 12 Feb 2024 17:51:32 +0100 Subject: [PATCH 4/6] garbage_collect_releases: fix silent fail, keep all relevant SDKs Signed-off-by: Thilo Fromm --- ci-automation/garbage_collect_releases.sh | 48 ++++++++++++----------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/ci-automation/garbage_collect_releases.sh b/ci-automation/garbage_collect_releases.sh index 988d572d9a..4378b3d703 100644 --- a/ci-automation/garbage_collect_releases.sh +++ b/ci-automation/garbage_collect_releases.sh @@ -43,28 +43,33 @@ function _garbage_collect_releases_impl() { source ci-automation/ci_automation_common.sh local sshcmd="$(gen_sshcmd)" - local keep_versions - mapfile -t keep_versions < <(unset POSIXLY_CORRECT; \ + local keep="$( + # For some reasons this is set to the empty string in some environments and it makes gawk + # behave like POSIX awk (i.e. no 'gensub'). + unset POSIXLY_CORRECT curl -s "${RELEASES_JSON_FEED}" \ - | jq -r 'keys_unsorted | .[] | match("[0-9]+\\.[0-9]+\\.[0-9]+") | .string' \ - | sort -Vr \ - | awk -v keep="${keep_per_chan}" -v lts="${keep_lts_releases}" ' - { - version = $1 - chan_num = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", version) + 0 - major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", version) + 0 + | jq -r 'keys_unsorted | .[] | match("[0-9]+\\.[0-9]+\\.[0-9]+") | .string' \ + | sort -Vr \ + | awk -v keep="${keep_per_chan}" -v lts="${keep_lts_releases}" ' + { + version = $1 + chan_num = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", version) + 0 + major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", version) + 0 - if (chan_num <= 2) { - if (chan_count[chan_num] < keep) - print version - chan_count[chan_num] = chan_count[chan_num] + 1 - } else { - if ( (chan_count["lts"][major] < keep) \ - && (length(chan_count["lts"]) <= lts) ) - print version - chan_count["lts"][major] = chan_count["lts"][major] + 1 - } - } ') + if (chan_num <= 2) { + if (chan_count[chan_num] < keep) + print version + chan_count[chan_num] = chan_count[chan_num] + 1 + } else { + if ( (chan_count["lts"][major] < keep) \ + && (length(chan_count["lts"]) <= lts) ) + print version + chan_count["lts"][major] = chan_count["lts"][major] + 1 + } + } ' + )" + + mapfile -t keep_versions <<<"${keep}" echo echo "######## The following version(s) will be kept ########" @@ -115,8 +120,7 @@ function _garbage_collect_releases_impl() { minor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g") + 0 patch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g") + 0 - if ( ((path == "sdk/amd64") || (path == "containers")) \ - && (vmajor == major) && (vminor == 0) && (vpatch == 0) ) { + if ( (path == "sdk/amd64") && (vmajor == major) ) { print "" print "## Skipping " version " in " path " because it contains the SDK for release " $0 " in keep list." ret = 0 From bd3d4c19e231b6de929e4097a90ff2071adf507f Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Wed, 14 Feb 2024 14:58:02 +0100 Subject: [PATCH 5/6] garbage_collect: no quotes for subshell exec assignments Co-authored-by: Krzesimir Nowak --- ci-automation/garbage_collect_releases.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci-automation/garbage_collect_releases.sh b/ci-automation/garbage_collect_releases.sh index 4378b3d703..8aee781fc8 100644 --- a/ci-automation/garbage_collect_releases.sh +++ b/ci-automation/garbage_collect_releases.sh @@ -43,7 +43,7 @@ function _garbage_collect_releases_impl() { source ci-automation/ci_automation_common.sh local sshcmd="$(gen_sshcmd)" - local keep="$( + local keep=$( # For some reasons this is set to the empty string in some environments and it makes gawk # behave like POSIX awk (i.e. no 'gensub'). unset POSIXLY_CORRECT @@ -67,7 +67,7 @@ function _garbage_collect_releases_impl() { chan_count["lts"][major] = chan_count["lts"][major] + 1 } } ' - )" + ) mapfile -t keep_versions <<<"${keep}" From ce3509145586281fdf18c8bafcdb5ea505e16c13 Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Wed, 14 Feb 2024 17:37:24 +0100 Subject: [PATCH 6/6] garbage_collect_releases.sh: more readable variables in AWK script Signed-off-by: Thilo Fromm --- ci-automation/garbage_collect_releases.sh | 49 +++++++++++++---------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/ci-automation/garbage_collect_releases.sh b/ci-automation/garbage_collect_releases.sh index 8aee781fc8..4ac2f628f8 100644 --- a/ci-automation/garbage_collect_releases.sh +++ b/ci-automation/garbage_collect_releases.sh @@ -98,58 +98,65 @@ function _garbage_collect_releases_impl() { "ls -1 ${BUILDCACHE_PATH_PREFIX}/${dir} | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$'"); do local o_fullpath="${fullpath}/${version}" - # skip if version is marked for keeping OR if it's a new release about to be published + # Go through all deletion candidate versions in directory + # skip if candidate version is marked for keeping OR if it's a new release about to be published + # or if it ships an SDK used by a version in the keep list. if printf "%s\n" "${keep_versions[@]}" \ - | { unset POSIXLY_CORRECT ; awk -v version="${version}" -v path="${dir}" ' + | { unset POSIXLY_CORRECT ; awk -v candidate_version="${version}" -v path="${dir}" ' BEGIN { - vmajor = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", version) + 0 - vminor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", version) + 0 - vpatch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g", version) + 0 + # Candidate version (from build cache directory) that was passed in candidate_version variable + candidate_major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", candidate_version) + 0 + candidate_minor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", candidate_version) + 0 + candidate_patch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g", candidate_version) + 0 ret = 1 } { - if ($0 == version) { + # The whole keep versions list is piped into AWK; match the candidate (build cache) version agains each entry. + + keep_list_entry = $0 + if (keep_list_entry == candidate_version) { print "" - print "## Skipping " version " because it is in the keep list." + print "## Skipping " candidate_version " because it is in the keep list." ret = 0 exit } - major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g") + 0 - minor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g") + 0 - patch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g") + 0 + keep_list_major = gensub("([0-9]+)\\.[0-9]+\\.[0-9]+","\\1","g", keep_list_entry) + 0 + keep_list_minor = gensub("[0-9]+\\.([0-9]+)\\.[0-9]+","\\1","g", keep_list_entry) + 0 + keep_list_patch = gensub("[0-9]+\\.[0-9]+\\.([0-9]+)","\\1","g", keep_list_entry) + 0 - if ( (path == "sdk/amd64") && (vmajor == major) ) { + if ( (path == "sdk/amd64") && (candidate_major == keep_list_major) ) { print "" - print "## Skipping " version " in " path " because it contains the SDK for release " $0 " in keep list." + print "## Skipping " candidate_version " in " path " because it contains the SDK for release " keep_list_entry " in keep list." ret = 0 exit } - if (major_alpha == "") - major_alpha = major + # keep list piped into AWK is sorted (descending), so the very first (i.e. highest) version on top is the most recent Alpha release + if (latest_alpha_major == "") + latest_alpha_major = keep_list_major - if (vmajor > major_alpha) { + if (candidate_major > latest_alpha_major) { print "" - print "## Skipping " version " because major version is higher than the latest Alpha (" major_alpha ") in keep list." + print "## Skipping " candidate_version " because major version is higher than the latest Alpha (" latest_alpha_major ") in keep list." print "(I.e. this is an unpublished new Alpha release)" ret = 0 exit } - if ((vmajor == major) && (vminor > minor)) { + if ((candidate_major == keep_list_major) && (candidate_minor > keep_list_minor)) { print "" - print "## Skipping " version " because major version is in keep list and minor version is higher than the latest release." - print "(I.e. this is an unpublished channel progression " $0 " -> " version ")" + print "## Skipping " candidate_version " because major version is in keep list and minor version is higher than the latest release." + print "(I.e. this is an unpublished channel progression " keep_list_entry " -> " candidate_version ")" ret = 0 exit } - if ((vmajor == major) && (vminor == minor) && (vpatch > patch)) { + if ((candidate_major == keep_list_major) && (candidate_minor == keep_list_minor) && (candidate_patch > keep_list_patch)) { print "" print "## Skipping " version " because major and minor versions are in keep list and patch version is higher than the latest release." - print "(I.e. this is an unpublished new patch release " $0 " -> " version ")" + print "(I.e. this is an unpublished new patch release " keep_list_entry " -> " candidate_version ")" ret = 0 exit }