Merge pull request #273 from flatcar-linux/scripts

ci-container/test: add AWS test script for CI automation
2026-01-17 06:22:18 +01:00 · 2022-05-10 18:35:12 +02:00 · 2022-05-10 18:35:12 +02:00 · a954d4f164
commit a954d4f164
parent b2f63c8e17 47eb233fc4
4 changed files with 382 additions and 1 deletions
--- a/ci-automation/ci-config.env
+++ b/ci-automation/ci-config.env
@ -102,3 +102,20 @@ DIGITALOCEAN_PARALLEL="${PARALLEL_TESTS:-8}"
 VMWARE_ESX_PARALLEL="${PARALLEL_TESTS:-4}"
 # VMWARE_ESX_CREDS should come from sdk_container/.env and must be
 # base64-encoded.
+
+# -- AWS --
+
+: ${AWS_amd64_INSTANCE_TYPE:="t3.small"}
+# Space separated list of instance types. On those instances the
+# cl.internet kola test will be run if this test is selected to run.
+: ${AWS_amd64_MORE_INSTANCE_TYPES:="m4.2xlarge"}
+: ${AWS_arm64_INSTANCE_TYPE:="a1.large"}
+# Space separated list of instance types. On those instances the
+# cl.internet kola test will be run if this test is selected to run.
+: ${AWS_arm64_MORE_INSTANCE_TYPES:=""}
+: ${AWS_IAM_PROFILE:="ciauto-test"}
+: ${AWS_REGION:="us-east-1"}
+: ${AWS_AMI_ID:=""}
+AWS_PARALLEL="${PARALLEL_TESTS:-8}"
+# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY should come from
+# sdk_container/.env
--- a/ci-automation/test.sh
+++ b/ci-automation/test.sh
@ -146,6 +146,9 @@ function test_run() {
    local success=false
    # A job on each worker prunes old mantle images (docker image prune)
    echo "docker rm -f '${container_name}'" >> ./ci-cleanup.sh
+
+    # Vendor tests may need to know if it is a first run or a rerun
+    touch "${work_dir}/first_run"
    for retry in $(seq "${retries}"); do
        local tapfile="results-run-${retry}.tap"
        local failfile="failed-run-${retry}.txt"
@ -162,8 +165,9 @@ function test_run() {
                \"${arch}\" \
                \"${vernum}\" \
                \"${tapfile}\" \
-                $@"
+                $*"
        set -e
+        rm -f "${work_dir}/first_run"

        docker run --pull always --rm --name="${container_name}" --privileged --net host -v /dev:/dev \
          -w /work -v "$PWD":/work "${mantle_ref}" \
--- a/ci-automation/vendor-testing/aws.sh
+++ b/ci-automation/vendor-testing/aws.sh
@ -0,0 +1,82 @@
+#!/bin/bash
+# Copyright (c) 2022 The Flatcar Maintainers.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+set -euo pipefail
+
+# Test execution script for the AWS vendor image.
+# This script is supposed to run in the mantle container.
+
+source ci-automation/vendor_test.sh
+
+board="${CIA_ARCH}-usr"
+escaped_vernum="${CIA_VERNUM//+/-}"
+image_name="ci-${escaped_vernum}-${CIA_ARCH}"
+aws_instance_type_var="AWS_${CIA_ARCH}_INSTANCE_TYPE"
+aws_instance_type="${!aws_instance_type_var}"
+more_aws_instance_types_var="AWS_${CIA_ARCH}_MORE_INSTANCE_TYPES"
+set -o noglob # there shouldn't be any instance types with asterisks
+              # in it, but…
+more_aws_instance_types=( ${!more_aws_instance_types_var} )
+set +o noglob
+
+vmdk='flatcar_production_ami_vmdk_image.vmdk'
+tarball="${vmdk}.bz2"
+
+if [[ "${AWS_AMI_ID}" == "" ]]; then
+    if [[ -f "${vmdk}" ]]; then
+        echo "++++ ${CIA_TESTSCRIPT}: using existing ${vmdk} for ${CIA_VERNUM} (${CIA_ARCH}) ++++"
+    else
+        echo "++++ ${CIA_TESTSCRIPT}: downloading ${tarball} for ${CIA_VERNUM} (${CIA_ARCH}) ++++"
+        copy_from_buildcache "images/${CIA_ARCH}/${CIA_VERNUM}/${tarball}" .
+        lbunzip2 "${tarball}"
+    fi
+
+    aws_bucket="flatcar-kola-ami-import-${AWS_REGION}"
+    aws_s3_path="s3://${aws_bucket}/${escaped_vernum}/${board}/"
+    trap 'ore -d aws delete --region="${AWS_REGION}" --board="${board}" --name="${image_name}" --ami-name="${image_name}" --file="${vmdk}" --bucket "${aws_s3_path}"' EXIT
+    ore aws initialize --region="${AWS_REGION}" --bucket "${aws_bucket}"
+    AWS_AMI_ID=$(ore aws upload --force --region="${AWS_REGION}" --board="${board}" --name="${image_name}" --ami-name="${image_name}" --ami-description="Flatcar Test ${image_name}" --file="${vmdk}" --bucket "${aws_s3_path}" | jq -r .HVM)
+    echo "++++ ${CIA_TESTSCRIPT}: created new AMI ${AWS_AMI_ID} (will be removed after testing) ++++"
+fi
+
+run_kola_tests() {
+    local instance_type="${1}"; shift
+    local instance_tapfile="${1}"; shift
+
+    timeout --signal=SIGQUIT 6h \
+        kola run \
+         --board="${board}" \
+         --basename="${image_name}" \
+         --channel="${CIA_CHANNEL}" \
+         --offering='basic' \
+         --parallel="${AWS_PARALLEL}" \
+         --platform=aws \
+         --aws-ami="${AWS_AMI_ID}" \
+         --aws-region="${AWS_REGION}" \
+         --aws-type="${instance_type}" \
+         --aws-iam-profile="${AWS_IAM_PROFILE}" \
+         --tapfile="${instance_tapfile}" \
+         --torcx-manifest="${CIA_TORCX_MANIFEST}" \
+         "${@}"
+}
+
+query_kola_tests() {
+    shift; # ignore the instance type
+    kola list --platform=aws --filter "${@}"
+}
+
+# these are set in ci-config.env
+export AWS_ACCESS_KEY_ID
+export AWS_SECRET_ACCESS_KEY
+
+run_kola_tests_on_instances \
+    "${aws_instance_type}" \
+    "${CIA_TAPFILE}" \
+    "${CIA_FIRST_RUN}" \
+    "${more_aws_instance_types[@]}" \
+    '--' \
+    'cl.internet' \
+    '--' \
+    "${@}"
--- a/ci-automation/vendor_test.sh
+++ b/ci-automation/vendor_test.sh
@ -51,6 +51,9 @@
 #   Path to the Torcx manifest. Usually passed to kola through the
 #   --torcx-manifest parameter.
 #
+# CIA_FIRST_RUN:
+#   1 if this is a first run, 0 if it is a rerun of failed tests.
+#
 #
 # After this script is sourced, the parameters in ${@} specify test
 # cases / test case patterns to run.
@ -90,6 +93,11 @@ fi
 ciavts_tapfile="${PWD}/${ciavts_work_dir}/${ciavts_tapfile}"
 ciavts_torcx_manifest="${PWD}/${ciavts_main_work_dir}/torcx_manifest.json"

+ciavts_first_run=0
+if [[ -f "${ciavts_main_work_dir}/first_run" ]]; then
+    ciavts_first_run=1
+fi
+
 echo "++++ Running ${ciavts_testscript} inside ${ciavts_work_dir} ++++"

 cd "${ciavts_work_dir}"
@ -102,6 +110,276 @@ CIA_TESTSCRIPT="${ciavts_testscript}"
 CIA_GIT_VERSION="${ciavts_git_version}"
 CIA_BUILD_TYPE="${ciavts_type}"
 CIA_TORCX_MANIFEST="${ciavts_torcx_manifest}"
+CIA_FIRST_RUN="${ciavts_first_run}"

 # Unset all variables with ciavts_ prefix now.
 unset -v "${!ciavts_@}"
+
+# Prefixes all test names in the tap file with a given prefix, so the
+# test name like "cl.basic" will become "extra-test.[${prefix}].cl.basic".
+#
+# Typical use:
+#   prefix_tap_file "${instance_type}" "${tapfile}"
+#
+# Parameters:
+# 1 - prefix
+# 2 - tap file, modified in place
+function prefix_tap_file() {
+    local prefix="${1}"; shift
+    local tap_file="${1}"; shift
+    # drop the dots from prefix
+    local actual_prefix="extra-test.[${prefix}]."
+
+    sed --in-place --expression 's/^\(\s*\(not\)\?\s*ok[^-]*\s*-\s*\)\(\S\)/\1'"${actual_prefix}"'\3/g' "${tap_file}"
+}
+
+# Filters the test names, so it puts only the real names of the
+# prefixed tests into the chosen variable. For example for prefix
+# "foo", it will ignore the test name like "cl.basic", but will print
+# "cl.internet" for a test name like "extra-test.[foo].cl.internet".
+# "*" is treated specially - it will be inserted into the chosen
+# variable if it is passed.
+#
+# Typical use:
+#   filter_prefixed_tests tests_to_run "${instance_type}" "${@}"
+#   if [[ "${#tests_to_run[@]}" -gt 0 ]]; then …; fi
+#
+# Parameters:
+# 1 - name of an array variable where the filtering results will be stored
+# 2 - prefix
+# @ - test names
+function filter_prefixed_tests() {
+    local var_name="${1}"; shift
+    local prefix="${1}"; shift
+    # rest of the parameters are test names
+    local -n results="${var_name}"
+    local name
+    local stripped_name
+    for name; do
+        stripped_name="${name#extra-test.\[${prefix}\].}"
+        if [[ "${stripped_name}" != "${name}" ]]; then
+            results+=( "${stripped_name}" )
+            continue
+        elif [[ "${name}" = '*' ]]; then
+            results+=( '*' )
+        fi
+    done
+}
+
+# Filters out the extra tests from the passed test names. Ignored test
+# names begin with "extra-test.". The results of the filtering are
+# inserted into the chosen variable.
+#
+# Typical use:
+#   filter_out_prefixed_tests tests_to_run "${@}"
+#   if [[ "${#tests_to_run[@]}" -gt 0 ]]; then …; fi
+#
+# Parameters:
+# 1 - name of an array variable where the filtering results will be stored
+# @ - test names
+function filter_out_prefixed_tests() {
+    local var_name="${1}"; shift
+    local -n results="${var_name}"
+    local name
+    for name; do
+        if [[ "${name#extra-test.}" = "${name}" ]]; then
+            results+=( "${name}" )
+        fi
+    done
+}
+
+# Merges into the first (main) tap file the contents of other tap
+# files. It is very simple - the function assumes that all the tap
+# files begin with a line like:
+#
+# 1..${number_of_tests}
+#
+# Other lines that are processed should begin like:
+#
+# (not)? ok - ${test_name}
+#
+# Any other lines are copied verbatim.
+#
+# The other tap files should already be preprocessed by
+# prefix_tap_file to avoid duplicated test names.
+#
+# Typical use:
+#   merge_tap_files "${tap_file}" extra-validation-*.tap
+#   rm -f extra-validation-*.tap
+#
+# Parameters:
+# 1 - main tap file
+# @ - other tap files
+function merge_tap_files() {
+    local main_tap_file="${1}"; shift
+    # rest of the parameters are other tap files
+
+    local main_test_count=0
+    if [[ -f "${main_tap_file}" ]]; then
+        main_test_count=$(head --lines=1 "${main_tap_file}" | grep --only-matching '[0-9]\+$')
+    fi
+    local other_test_count
+    local other_tap_file
+    local tmp_tap_file="${main_tap_file}.mtf.tmp"
+    for other_tap_file; do
+        if [[ ! -f "${other_tap_file}" ]]; then
+            continue
+        fi
+        other_test_count=$(head --lines=1 "${other_tap_file}" | grep --only-matching '[0-9]\+$' || echo 0 )
+        ((main_test_count+=other_test_count))
+    done
+    echo "1..${main_test_count}" >"${tmp_tap_file}"
+    if [[ -f "${main_tap_file}" ]]; then
+        tail --lines=+2 "${main_tap_file}" >>"${tmp_tap_file}"
+    fi
+    for other_tap_file; do
+        if [[ ! -f "${other_tap_file}" ]]; then
+            continue
+        fi
+        tail --lines=+2 "${other_tap_file}" >>"${tmp_tap_file}"
+    done
+    mv --force "${tmp_tap_file}" "${main_tap_file}"
+}
+
+# Runs or reruns the tests on the main instance and other
+# instances. Other instances usually run a subset of tests only.
+#
+# For this function to work, the caller needs to define two functions
+# beforehand:
+#
+# run_kola_tests that takes the following parameters:
+# 1 - instance type
+# 2 - tap file
+# @ - tests to run
+#
+# query_kola_tests that takes the following parameters:
+# 1 - instance type
+# @ - tests to run
+# This function should print the names of the tests to run. Every line
+# of the output should have one test name to run. Any other cruft in
+# the line will be ignored.
+#
+# Typical use:
+# function run_kola_tests() {
+#     local instance_type="${1}"; shift
+#     local tap_file="${1}"; shift
+#     kola run … "${@}"
+# }
+#
+# function query_kola_tests() {
+#     local instance_type="${1}"; shift
+#     kola list … "${@}"
+# }
+#
+# args=(
+#     "${main_instance}"
+#     "${CIA_TAPFILE}"
+#     "${CIA_FIRST_RUN}"
+#     "${other_instance_types[@]}"
+#     '--'
+#     'cl.internet'
+#     '--'
+#     "${tests_to_run[@]}"
+# )
+# run_kola_tests_on_instances "${args[@]}"
+#
+# Parameters:
+# 1 - main instance type - there all the tests are being run
+# 2 - main tap file
+# 3 - if this is first run (1 if it is, 0 if it is a rerun)
+# @ - other instance types followed by double dash (--) followed by
+#     test names for other instances to filter from the tests to be
+#     run followed by double dash, followed by tests to be run or
+#     rerun
+function run_kola_tests_on_instances() {
+    local main_instance_type="${1}"; shift
+    local main_tapfile="${1}"; shift
+    local is_first_run="${1}"; shift
+    local other_instance_types=()
+    local other_tests=()
+    local arg
+
+    while [[ "${#}" -gt 0 ]]; do
+        arg="${1}"; shift
+        if [[ "${arg}" = '--' ]]; then
+            break
+        fi
+        other_instance_types+=( "${arg}" )
+    done
+
+    while [[ "${#}" -gt 0 ]]; do
+        arg="${1}"; shift
+        if [[ "${arg}" = '--' ]]; then
+            break
+        fi
+        other_tests+=( "${arg}" )
+    done
+
+    # rest of the parameters are tests to be run or rerun
+
+    local instance_type
+    local queried_tests
+    local instance_tests=()
+    local tests_on_instances_running=0
+    local other_tests_for_fgrep
+    other_tests_for_fgrep="$(printf '%s\n' "${other_tests[@]}")"
+
+    for instance_type in "${other_instance_types[@]}"; do
+        # On first run we usually pass the canonical test names like
+        # cl.basic, cl.internet or *, so we decide which tests should
+        # be run on the other instances based on this list. On the
+        # other hand, the rerun will contain names of the failed tests
+        # only, and those are specific - if a test failed on the main
+        # instance, the name of the test will be like cl.basic; if a
+        # test failed on other instance, the name of the test will be
+        # like extra-test.[…].cl.basic. So in case of reruns, we want
+        # to filter the extra tests first then we decide which tests
+        # should be run.
+        if [[ "${is_first_run}" -eq 1 ]]; then
+            set -o noglob # noglob should not be necessary, as
+                          # query_kola_tests shouldn't return a
+                          # wildcard, but better to be safe than sorry
+            queried_tests="$(query_kola_tests "${instance_type}" "${@}")"
+            instance_tests=( $(grep --only-matching --fixed-strings "${other_tests_for_fgrep}" <<<"${queried_tests}" || :) )
+            set +o noglob
+        else
+            filter_prefixed_tests instance_tests "${instance_type}" "${@}"
+        fi
+        if [[ "${#instance_tests[@]}" -gt 0 ]]; then
+            tests_on_instances_running=1
+            (
+                local instance_tapfile="instance_${instance_type}_validate.tap"
+                set +e
+                set -x
+                local output
+                output=$(run_kola_tests "${instance_type}" "${instance_tapfile}" "${instance_tests[@]}" 2>&1)
+                set +x
+                set -e
+                local escaped_instance_type
+                escaped_instance_type="$(sed -e 's/[\/&]/\\&/g' <<<"${instance_type}")"
+                printf "=== START ${instance_type} ===\n%s\n=== END ${instance_type} ===\n" "$(sed -e "s/^/${escaped_instance_type}: /g" <<<"${output}")"
+                prefix_tap_file "${instance_type}" "${instance_tapfile}"
+            ) &
+        fi
+    done
+
+    local -a main_tests
+
+    filter_out_prefixed_tests main_tests "${@}"
+    if [[ "${#main_tests[@]}" -gt 0 ]]; then
+        # run in a subshell, so the set -x and set +e do not pollute
+        # the outer environment
+        (
+            set +e
+            set -x
+            run_kola_tests "${main_instance_type}" "${main_tapfile}" "${main_tests[@]}"
+            true
+        )
+    fi
+
+    if [[ "${tests_on_instances_running}" -eq 1 ]]; then
+        wait
+        merge_tap_files "${main_tapfile}" 'instance_'*'_validate.tap'
+        rm -f 'instance_'*'_validate.tap'
+    fi
+}