ci-automation: first stab at adding testing

Signed-off-by: Thilo Fromm <thilo@kinvolk.io>
This commit is contained in:
Thilo Fromm 2022-02-16 19:17:42 +01:00
parent 221351927e
commit f6f44e2ca8
5 changed files with 465 additions and 0 deletions

View File

@ -22,3 +22,7 @@ CI_GIT_EMAIL="infra+ci@flatcar-linux.org"
# build artifacts go here (in container)
CONTAINER_TORCX_ROOT="/home/sdk/build/torcx"
CONTAINER_IMAGE_ROOT="/home/sdk/build/images"
# Image / vendor tests settings
QEMU_IMAGE_NAME="flatcar_production_image.bin"
QEMU_PARALLEL=4

View File

@ -0,0 +1,281 @@
#!/bin/bash
#
# Copyright (c) 2021 The Flatcar Maintainers.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Helper script for extracting information from TAP files and for merging multiple
# TAP files into one report.
# The script uses a temporary SQLite DB for querzing and for result generation.
#
# Brief usage overview (scroll down for parameters etc.):
# tap_ingest_tapfile - add test results from tap file to the DB
# tap_list_vendors - list all vendors TAP files have been ingested for
# tap_failed_tests_for_vendor - list all tests that never succeded even once, per vendor
# tap_generate_report - generate a merged test report
TAPFILE_HELPER_DBNAME="results.sqlite3"
# wrapper around sqlite3 w/ retries if DB is locked
function __sqlite3_wrapper() {
local dbfile="$1"
shift
while true; do
sqlite3 "${dbfile}" "$@"
if [ $? -ne 5 ] ; then
return $?
fi
local sleep="$((1 + $RANDOM % 5))"
echo "Retrying in ${sleep} seconds." >&2
sleep "${sleep}"
done
}
# --
# Initialise the DB if it wasn't yet.
function __db_init() {
local dbname="${TAPFILE_HELPER_DBNAME}"
__sqlite3_wrapper "${dbname}" '
CREATE TABLE IF NOT EXISTS "test_case" (
"id" INTEGER,
"name" TEXT UNIQUE,
PRIMARY KEY("id")
);
CREATE TABLE IF NOT EXISTS "vendor" (
"id" INTEGER,
"name" TEXT UNIQUE,
PRIMARY KEY("id")
);
CREATE TABLE IF NOT EXISTS "test_run" (
"id" INTEGER NOT NULL,
"result" INTEGER NOT NULL,
"output" TEXT,
"case_id" INTEGER NOT NULL,
"run" INTEGER NOT NULL,
"vendor_id" INTEGER,
PRIMARY KEY("id"),
FOREIGN KEY("case_id") REFERENCES "test_case"("id"),
FOREIGN KEY("vendor_id") REFERENCES "vendor"("id"),
UNIQUE (case_id, run, vendor_id)
);
'
}
# --
# Read tapfile into temporary DB.
# INPUT:
# 1: <tapfile> - tapfile to ingest
# 2: <vendor> - vendor (qemu, azure, aws, etc...)
# 3: <run> - re-run iteration
function tap_ingest_tapfile() {
local tapfile="${1}"
local vendor="${2}"
local run="${3}"
local dbname="${TAPFILE_HELPER_DBNAME}"
local result=""
local test_name=""
local error_message=""
local in_error_message=false
if ! [ -f "${TAPFILE_HELPER_DBNAME}" ] ; then
__db_init
fi
# Wrap all SQL commands in a transaction to speed up INSERTs
local SQL="BEGIN TRANSACTION;"
# Example TAP input:
# ok - coreos.auth.verify
# ok - coreos.locksmith.tls
# not ok - cl.filesystem
# ---
# Error: "--- FAIL: cl.filesystem/deadlinks (1.86s)\n files.go:90: Dead symbolic links found: [/var/lib/flatcar-oem-gce/usr/lib64/python3.9/site-packages/certifi-3021.3.16-py3.9.egg-info]"
# ...
# ok - cl.cloudinit.script
# ok - kubeadm.v1.22.0.flannel.base
while read -r line; do
if [[ "${line}" == "1.."* ]] ; then continue; fi
if [ "${line}" = "---" ] ; then # note: read removes leading whitespaces
in_error_message=true
continue
fi
if $in_error_message ; then
if [ "${line}" = "..." ] ; then
in_error_message=false
else
error_message="$(echo -e "$line" \
| sed -e 's/^Error: "--- FAIL: /"/' -e 's/^[[:space:]]*//' \
| sed -e "s/[>\"']/_/g" -e 's/[[:space:]]/ /g')"
continue
fi
else
test_name="$(echo "${line}" | sed 's/^[^-]* - //')"
local result_string="$(echo "${line}" | sed 's/ - .*//')"
result=0
if [ "${result_string}" = "ok" ] ; then
result=1
fi
fi
SQL="${SQL}INSERT OR IGNORE INTO test_case(name) VALUES ('${test_name}');"
SQL="${SQL}INSERT OR IGNORE INTO vendor(name) VALUES ('${vendor}');"
SQL="${SQL}INSERT OR REPLACE INTO test_run(run,result,output,case_id,vendor_id)
VALUES ('${run}','${result}', '${error_message}',
(SELECT id FROM test_case WHERE name='${test_name}'),
(SELECT id FROM vendor WHERE name='${vendor}'));"
error_message=""
done < "$tapfile"
local SQL="${SQL}COMMIT;"
__sqlite3_wrapper "${dbname}" "${SQL}"
}
# --
# Print a list of all vendors we've seen so far.
function tap_list_vendors() {
local dbname="${TAPFILE_HELPER_DBNAME}"
__sqlite3_wrapper "${dbname}" 'SELECT DISTINCT name from vendor;'
}
# --
# List tests that never succeeded for a given vendor.
# INPUT:
# 1: <vendor> - Vendor name to check for failed test runs
function tap_failed_tests_for_vendor() {
local vendor="$1"
local dbname="${TAPFILE_HELPER_DBNAME}"
__sqlite3_wrapper "${dbname}" "
SELECT failed.name FROM test_case AS failed
WHERE EXISTS (
SELECT * FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND v.name='${vendor}'
AND c.name=failed.name
)
AND NOT exists (
SELECT * FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND v.name='${vendor}'
AND c.name=failed.name
AND t.result=1 );"
}
# --
# Print the tap file from contents of the database.
# INPUT:
# 1: <arch> - Architecture to be included in the first line of the report
# 2: <version> - OS version tested, to be included in the first line of the report
# 3: <include_transient_errors> - If set to "true" then debug output of transient test failures
# is included in the result report.
function tap_generate_report() {
local arch="$1"
local version="$2"
local full_error_report="${3:-false}"
local dbname="${TAPFILE_HELPER_DBNAME}"
local count="$(__sqlite3_wrapper "${dbname}" 'SELECT count(name) FROM test_case;')"
local vendors="$(__sqlite3_wrapper "${dbname}" 'SELECT name FROM vendor;' | tr '\n' ' ')"
echo "1..$((count+1))"
echo "ok - Version: ${version}, Architecture: ${arch}"
echo " ---"
echo " Platforms tested: ${vendors}"
echo " ..."
# Print result line for every test, including platforms it succeeded on
# and transient failed runs.
__sqlite3_wrapper "${dbname}" 'SELECT DISTINCT name from test_case;' | \
while read -r test_name; do
# "ok" if the test succeeded at least once for all vendors that run the test,
# "not ok" otherwise.
local verdict="$(__sqlite3_wrapper "${dbname}" "
SELECT failed.name FROM vendor AS failed
WHERE EXISTS (
SELECT * FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND v.name=failed.name
AND c.name='${test_name}'
)
AND NOT exists (
SELECT * FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND v.name=failed.name
AND c.name='${test_name}'
AND t.result=1 );
")"
if [ -n "${verdict}" ] ; then
verdict="not ok"
else
verdict="ok"
fi
# Generate a list of vendors and respective runs, in a single line.
function list_runs() {
local res="$1"
__sqlite3_wrapper -csv "${dbname}" "
SELECT v.name, t.run FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND c.name='${test_name}'
AND t.result=${res}
ORDER BY v.name;" \
| awk -F, '{ if (t && (t != $1)) {
printf t " " r "); "
r="";}
t=$1
if (r)
r=r ", " $2
else
r="(" $2 ; }
END { if (t) print t r ")"; }'
}
local succeded="$(list_runs 1)"
local failed="$(list_runs 0)"
echo "${verdict} - ${test_name}"
echo " ---"
if [ -n "${succeded}" ] ; then
echo " Succeeded: ${succeded}"
fi
if [ -n "${failed}" ] ; then
echo " Failed: ${failed}"
if [ "${verdict}" = "not ok" -o "${full_error_report}" = "true" ] ; then
# generate diagnostic output, per failed run.
__sqlite3_wrapper -csv "${dbname}" "
SELECT v.name, t.run
FROM test_run AS t, vendor AS v, test_case AS c
WHERE t.vendor_id=v.id AND t.case_id=c.id
AND c.name='${test_name}'
AND t.result=0
ORDER BY t.run DESC;" | \
sed 's/,/ /' | \
while read -r vendor run; do
echo " Error messages for ${vendor}, run ${run}:"
__sqlite3_wrapper -csv "${dbname}" "
SELECT t.output FROM test_run AS t, test_case AS c
WHERE t.case_id=c.id
AND c.name='${test_name}'
AND t.run='${run}';" | \
sed 's/"/ /' | \
awk '{print " LINE " NR":" $0}'
done
fi
fi
echo " ..."
done
}
# --

122
ci-automation/test.sh Normal file
View File

@ -0,0 +1,122 @@
#!/bin/bash
#
# Copyright (c) 2021 The Flatcar Maintainers.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# >>> This file is supposed to be SOURCED from the repository ROOT. <<<
#
# test_run() should be called w/ the positional INPUT parameters below.
# Test scenarios runner stub.
# This script will run test scenarios for a single image type.
# Tests will be started inside a container based on the packages container image
# (which contains the torcx manifest).
# This script is generic and will use a vendor-specific test runner from
# "ci-automation/vendor-testing/<image>.sh.
#
# PREREQUISITES:
#
# 1. SDK version and OS image version are recorded in sdk_container/.repo/manifests/version.txt
# 2. Scripts repo version tag of OS image version to be built is available and checked out.
# 3. Flatcar packages container is available via build cache server
# from "/containers/[VERSION]/flatcar-packages-[ARCH]-[FLATCAR_VERSION].tar.gz"
# or present locally. Container must contain binary packages and torcx artefacts.
# 4. Vendor image(s) to run tests for are available on buildcache ( images/[ARCH]/[FLATCAR_VERSION]/ )
#
# INPUT:
#
# 1. Architecture (ARCH) of the TARGET vm images ("arm64", "amd64").
# 2. Image type to be tested. One of:
# ami, azure, azure_pro, digitalocean, gce, gce_pro, packet, qemu, qemu_uefi, vmware
#
# OPTIONAL INPUT:
#
# 3. List of tests / test patterns. Defaults to "*" (all tests).
# All positional arguments after the first 2 (see above) are tests / patterns of tests to run.
#
# MAX_RETRIES. Environment variable. Number of re-runs to overcome transient failures. Defaults to 999.
#
# OUTPUT:
#
# 1. 2 merged TAP reports with all test runs / vendors.
# - a "summary" report which contains error messages only for tests which never succeeded (per vendor).
# - a "detailed" report which also contains error messages of transient failures which succeeded after re-runs.
# These reports will be updated after each (re-)run of each vendor, making the test job safe
# to abort at any point - the previous runs' results won't be lost.
# 2. "./ci-cleanup.sh" with commands to clean up temporary build resources,
# to be run after this step finishes / when this step is aborted.
set -eu
function test_run() {
local arch="$1" ; shift
local image="$2"; shift
# default to all tests
if [ $# -le 0 ] ; then
set -- *
fi
source ci-automation/tapfile_helper_lib.sh
source ci-automation/ci_automation_common.sh
init_submodules
source sdk_container/.repo/manifests/version.txt
local vernum="${FLATCAR_VERSION}"
local docker_vernum="$(vernum_to_docker_image_version "${vernum}")"
local packages="flatcar-packages-${arch}"
local packages_image="${packages}:${docker_vernum}"
docker_image_from_buildcache "${packages}" "${docker_vernum}"
local tests_dir="__TESTS__/${image}"
mkdir -p "${tests_dir}"
local container_name="flatcar-tests-${arch}-${docker_vernum}-${image}"
local retry=""
local success=false
for retry in $(seq "${retries}"); do
local tapfile="results-run-${retry}.tap"
local failfile="failed-run-${retry}."
set -o noglob
./run_sdk_container -n "${container_name}" -C "${packages_image}" -v "${vernum}" \
ci-automation/vendor/testing/"${image}".sh \
"${tests_dir}" \
"${arch}" \
"${vernum}" \
"${tapfile}" \
$@
set +o noglob
./run_sdk_container -n "${container_name}" -C "${packages_image}" -v "${vernum}" \
ci-automation/test_update_reruns.sh \
"${tests_dir}/${tapfile}" "${image}" "${retry}" \
"${tests_dir}/failed-run-${retry}.txt"
local failed_tests="$(cat "${tests_dir}/failed-run-${retry}.txt")"
if [ -z "$failed_tests" ] ; then
echo "########### All tests succeeded. ###########"
success=true
break
fi
echo "########### Some tests failed and will be re-run. ###########"
echo "Failed tests: $failed_tests"
echo "-----------"
set -- $failed_tests
done
if ! $success; then
echo "########### All re-runs exhausted ($retries). Giving up. ###########"
fi
# TODO: publish to bincache?
# "${tests_dir}/"*.tap
# "${tests_dir}/_kola_temp.tar.xz"
}
# --

View File

@ -0,0 +1,20 @@
#!/bin/bash
#
# Copyright (c) 2021 The Flatcar Maintainers.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Helper script for test.sh to update the test failures text file.
# test.sh uses this to determine which tests need to re-run.
# This script is run within the SDK container.
set -eu
tapfile="$1"
image="$2"
retry="$3"
outfile="$4"
source ci-automation/tapfile_helper_lib.sh
tap_ingest_tapfile "${tapfile}" "${image}" "${retry}"
tap_failed_tests_for_vendor "${image}" | tee "${outfile}"

View File

@ -0,0 +1,38 @@
#!/bin/bash
#
# Copyright (c) 2021 The Flatcar Maintainers.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Test execution script for the qemu vendor image.
# This script is supposed to run in the SDK container.
function run_testsuite() {
local work_dir="$1"; shift
local arch="$2"; shift
local vernum="$3"; shift
local tapfile="$4"; shift
# $@ now contains tests / test patterns to run
source ci-automation/ci_automation_common.sh
mkdir -p "${work_dir}"
cd "${work_dir}"
copy_from_buildcache "images/${arch}/${vernum}/${QEMU_IMAGE_NAME}" .
set -o noglob
sudo kola run
--board="${arch}-usr" \
--parallel="${QEMU_PARALLEL}" \
--platform=qemu \
--qemu-bios=/usr/share/qemu/bios-256k.bin \
--qemu-image="${QEMU_IMAGE_NAME}" \
--tapfile="${tapfile}" \
--torcx-manifest="${CONTAINER_TORCX_ROOT}/${arch}-usr/latest/torcx_manifest.json"
$@
set +o noglob
}