From f6f44e2ca8b5cbf2b6e2026efde421d8c76b8aa6 Mon Sep 17 00:00:00 2001 From: Thilo Fromm Date: Wed, 16 Feb 2022 19:17:42 +0100 Subject: [PATCH] ci-automation: first stab at adding testing Signed-off-by: Thilo Fromm --- ci-automation/ci-config.env | 4 + ci-automation/tapfile_helper_lib.sh | 281 +++++++++++++++++++++++++++ ci-automation/test.sh | 122 ++++++++++++ ci-automation/test_update_reruns.sh | 20 ++ ci-automation/vendor-testing/qemu.sh | 38 ++++ 5 files changed, 465 insertions(+) create mode 100644 ci-automation/tapfile_helper_lib.sh create mode 100644 ci-automation/test.sh create mode 100755 ci-automation/test_update_reruns.sh create mode 100644 ci-automation/vendor-testing/qemu.sh diff --git a/ci-automation/ci-config.env b/ci-automation/ci-config.env index 40f6a6a959..3e4f347802 100644 --- a/ci-automation/ci-config.env +++ b/ci-automation/ci-config.env @@ -22,3 +22,7 @@ CI_GIT_EMAIL="infra+ci@flatcar-linux.org" # build artifacts go here (in container) CONTAINER_TORCX_ROOT="/home/sdk/build/torcx" CONTAINER_IMAGE_ROOT="/home/sdk/build/images" + +# Image / vendor tests settings +QEMU_IMAGE_NAME="flatcar_production_image.bin" +QEMU_PARALLEL=4 diff --git a/ci-automation/tapfile_helper_lib.sh b/ci-automation/tapfile_helper_lib.sh new file mode 100644 index 0000000000..0cd7f3efa4 --- /dev/null +++ b/ci-automation/tapfile_helper_lib.sh @@ -0,0 +1,281 @@ +#!/bin/bash +# +# Copyright (c) 2021 The Flatcar Maintainers. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Helper script for extracting information from TAP files and for merging multiple +# TAP files into one report. +# The script uses a temporary SQLite DB for querzing and for result generation. +# +# Brief usage overview (scroll down for parameters etc.): +# tap_ingest_tapfile - add test results from tap file to the DB +# tap_list_vendors - list all vendors TAP files have been ingested for +# tap_failed_tests_for_vendor - list all tests that never succeded even once, per vendor +# tap_generate_report - generate a merged test report + + +TAPFILE_HELPER_DBNAME="results.sqlite3" + +# wrapper around sqlite3 w/ retries if DB is locked +function __sqlite3_wrapper() { + local dbfile="$1" + shift + + while true; do + sqlite3 "${dbfile}" "$@" + if [ $? -ne 5 ] ; then + return $? + fi + local sleep="$((1 + $RANDOM % 5))" + echo "Retrying in ${sleep} seconds." >&2 + sleep "${sleep}" + done +} +# -- + +# Initialise the DB if it wasn't yet. +function __db_init() { + local dbname="${TAPFILE_HELPER_DBNAME}" + + __sqlite3_wrapper "${dbname}" ' + CREATE TABLE IF NOT EXISTS "test_case" ( + "id" INTEGER, + "name" TEXT UNIQUE, + PRIMARY KEY("id") + ); + CREATE TABLE IF NOT EXISTS "vendor" ( + "id" INTEGER, + "name" TEXT UNIQUE, + PRIMARY KEY("id") + ); + CREATE TABLE IF NOT EXISTS "test_run" ( + "id" INTEGER NOT NULL, + "result" INTEGER NOT NULL, + "output" TEXT, + "case_id" INTEGER NOT NULL, + "run" INTEGER NOT NULL, + "vendor_id" INTEGER, + PRIMARY KEY("id"), + FOREIGN KEY("case_id") REFERENCES "test_case"("id"), + FOREIGN KEY("vendor_id") REFERENCES "vendor"("id"), + UNIQUE (case_id, run, vendor_id) + ); +' +} +# -- + +# Read tapfile into temporary DB. +# INPUT: +# 1: - tapfile to ingest +# 2: - vendor (qemu, azure, aws, etc...) +# 3: - re-run iteration + +function tap_ingest_tapfile() { + local tapfile="${1}" + local vendor="${2}" + local run="${3}" + + local dbname="${TAPFILE_HELPER_DBNAME}" + + local result="" + local test_name="" + local error_message="" + local in_error_message=false + + if ! [ -f "${TAPFILE_HELPER_DBNAME}" ] ; then + __db_init + fi + + # Wrap all SQL commands in a transaction to speed up INSERTs + local SQL="BEGIN TRANSACTION;" + + # Example TAP input: + # ok - coreos.auth.verify + # ok - coreos.locksmith.tls + # not ok - cl.filesystem + # --- + # Error: "--- FAIL: cl.filesystem/deadlinks (1.86s)\n files.go:90: Dead symbolic links found: [/var/lib/flatcar-oem-gce/usr/lib64/python3.9/site-packages/certifi-3021.3.16-py3.9.egg-info]" + # ... + # ok - cl.cloudinit.script + # ok - kubeadm.v1.22.0.flannel.base + while read -r line; do + if [[ "${line}" == "1.."* ]] ; then continue; fi + if [ "${line}" = "---" ] ; then # note: read removes leading whitespaces + in_error_message=true + continue + fi + + if $in_error_message ; then + if [ "${line}" = "..." ] ; then + in_error_message=false + else + error_message="$(echo -e "$line" \ + | sed -e 's/^Error: "--- FAIL: /"/' -e 's/^[[:space:]]*//' \ + | sed -e "s/[>\"']/_/g" -e 's/[[:space:]]/ /g')" + continue + fi + else + test_name="$(echo "${line}" | sed 's/^[^-]* - //')" + local result_string="$(echo "${line}" | sed 's/ - .*//')" + result=0 + if [ "${result_string}" = "ok" ] ; then + result=1 + fi + fi + + SQL="${SQL}INSERT OR IGNORE INTO test_case(name) VALUES ('${test_name}');" + SQL="${SQL}INSERT OR IGNORE INTO vendor(name) VALUES ('${vendor}');" + + SQL="${SQL}INSERT OR REPLACE INTO test_run(run,result,output,case_id,vendor_id) + VALUES ('${run}','${result}', '${error_message}', + (SELECT id FROM test_case WHERE name='${test_name}'), + (SELECT id FROM vendor WHERE name='${vendor}'));" + error_message="" + done < "$tapfile" + + local SQL="${SQL}COMMIT;" + + __sqlite3_wrapper "${dbname}" "${SQL}" +} +# -- + +# Print a list of all vendors we've seen so far. +function tap_list_vendors() { + local dbname="${TAPFILE_HELPER_DBNAME}" + + __sqlite3_wrapper "${dbname}" 'SELECT DISTINCT name from vendor;' +} +# -- + +# List tests that never succeeded for a given vendor. +# INPUT: +# 1: - Vendor name to check for failed test runs +function tap_failed_tests_for_vendor() { + local vendor="$1" + + local dbname="${TAPFILE_HELPER_DBNAME}" + + __sqlite3_wrapper "${dbname}" " + SELECT failed.name FROM test_case AS failed + WHERE EXISTS ( + SELECT * FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND v.name='${vendor}' + AND c.name=failed.name + ) + AND NOT exists ( + SELECT * FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND v.name='${vendor}' + AND c.name=failed.name + AND t.result=1 );" +} +# -- + +# Print the tap file from contents of the database. +# INPUT: +# 1: - Architecture to be included in the first line of the report +# 2: - OS version tested, to be included in the first line of the report +# 3: - If set to "true" then debug output of transient test failures +# is included in the result report. +function tap_generate_report() { + local arch="$1" + local version="$2" + local full_error_report="${3:-false}" + + local dbname="${TAPFILE_HELPER_DBNAME}" + + local count="$(__sqlite3_wrapper "${dbname}" 'SELECT count(name) FROM test_case;')" + local vendors="$(__sqlite3_wrapper "${dbname}" 'SELECT name FROM vendor;' | tr '\n' ' ')" + + echo "1..$((count+1))" + echo "ok - Version: ${version}, Architecture: ${arch}" + echo " ---" + echo " Platforms tested: ${vendors}" + echo " ..." + + # Print result line for every test, including platforms it succeeded on + # and transient failed runs. + __sqlite3_wrapper "${dbname}" 'SELECT DISTINCT name from test_case;' | \ + while read -r test_name; do + + # "ok" if the test succeeded at least once for all vendors that run the test, + # "not ok" otherwise. + local verdict="$(__sqlite3_wrapper "${dbname}" " + SELECT failed.name FROM vendor AS failed + WHERE EXISTS ( + SELECT * FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND v.name=failed.name + AND c.name='${test_name}' + ) + AND NOT exists ( + SELECT * FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND v.name=failed.name + AND c.name='${test_name}' + AND t.result=1 ); + ")" + if [ -n "${verdict}" ] ; then + verdict="not ok" + else + verdict="ok" + fi + + # Generate a list of vendors and respective runs, in a single line. + function list_runs() { + local res="$1" + __sqlite3_wrapper -csv "${dbname}" " + SELECT v.name, t.run FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND c.name='${test_name}' + AND t.result=${res} + ORDER BY v.name;" \ + | awk -F, '{ if (t && (t != $1)) { + printf t " " r "); " + r="";} + t=$1 + if (r) + r=r ", " $2 + else + r="(" $2 ; } + END { if (t) print t r ")"; }' + } + + local succeded="$(list_runs 1)" + local failed="$(list_runs 0)" + + echo "${verdict} - ${test_name}" + echo " ---" + if [ -n "${succeded}" ] ; then + echo " Succeeded: ${succeded}" + fi + if [ -n "${failed}" ] ; then + echo " Failed: ${failed}" + if [ "${verdict}" = "not ok" -o "${full_error_report}" = "true" ] ; then + # generate diagnostic output, per failed run. + __sqlite3_wrapper -csv "${dbname}" " + SELECT v.name, t.run + FROM test_run AS t, vendor AS v, test_case AS c + WHERE t.vendor_id=v.id AND t.case_id=c.id + AND c.name='${test_name}' + AND t.result=0 + ORDER BY t.run DESC;" | \ + sed 's/,/ /' | \ + while read -r vendor run; do + echo " Error messages for ${vendor}, run ${run}:" + __sqlite3_wrapper -csv "${dbname}" " + SELECT t.output FROM test_run AS t, test_case AS c + WHERE t.case_id=c.id + AND c.name='${test_name}' + AND t.run='${run}';" | \ + sed 's/"/ /' | \ + awk '{print " LINE " NR":" $0}' + done + fi + fi + echo " ..." + done +} +# -- diff --git a/ci-automation/test.sh b/ci-automation/test.sh new file mode 100644 index 0000000000..95197dc736 --- /dev/null +++ b/ci-automation/test.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# +# Copyright (c) 2021 The Flatcar Maintainers. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# >>> This file is supposed to be SOURCED from the repository ROOT. <<< +# +# test_run() should be called w/ the positional INPUT parameters below. + +# Test scenarios runner stub. +# This script will run test scenarios for a single image type. +# Tests will be started inside a container based on the packages container image +# (which contains the torcx manifest). +# This script is generic and will use a vendor-specific test runner from +# "ci-automation/vendor-testing/.sh. +# +# PREREQUISITES: +# +# 1. SDK version and OS image version are recorded in sdk_container/.repo/manifests/version.txt +# 2. Scripts repo version tag of OS image version to be built is available and checked out. +# 3. Flatcar packages container is available via build cache server +# from "/containers/[VERSION]/flatcar-packages-[ARCH]-[FLATCAR_VERSION].tar.gz" +# or present locally. Container must contain binary packages and torcx artefacts. +# 4. Vendor image(s) to run tests for are available on buildcache ( images/[ARCH]/[FLATCAR_VERSION]/ ) +# +# INPUT: +# +# 1. Architecture (ARCH) of the TARGET vm images ("arm64", "amd64"). +# 2. Image type to be tested. One of: +# ami, azure, azure_pro, digitalocean, gce, gce_pro, packet, qemu, qemu_uefi, vmware +# +# OPTIONAL INPUT: +# +# 3. List of tests / test patterns. Defaults to "*" (all tests). +# All positional arguments after the first 2 (see above) are tests / patterns of tests to run. +# +# MAX_RETRIES. Environment variable. Number of re-runs to overcome transient failures. Defaults to 999. +# +# OUTPUT: +# +# 1. 2 merged TAP reports with all test runs / vendors. +# - a "summary" report which contains error messages only for tests which never succeeded (per vendor). +# - a "detailed" report which also contains error messages of transient failures which succeeded after re-runs. +# These reports will be updated after each (re-)run of each vendor, making the test job safe +# to abort at any point - the previous runs' results won't be lost. +# 2. "./ci-cleanup.sh" with commands to clean up temporary build resources, +# to be run after this step finishes / when this step is aborted. + +set -eu + +function test_run() { + local arch="$1" ; shift + local image="$2"; shift + + # default to all tests + if [ $# -le 0 ] ; then + set -- * + fi + + source ci-automation/tapfile_helper_lib.sh + source ci-automation/ci_automation_common.sh + init_submodules + + source sdk_container/.repo/manifests/version.txt + local vernum="${FLATCAR_VERSION}" + local docker_vernum="$(vernum_to_docker_image_version "${vernum}")" + + local packages="flatcar-packages-${arch}" + local packages_image="${packages}:${docker_vernum}" + + docker_image_from_buildcache "${packages}" "${docker_vernum}" + + local tests_dir="__TESTS__/${image}" + mkdir -p "${tests_dir}" + + local container_name="flatcar-tests-${arch}-${docker_vernum}-${image}" + + local retry="" + local success=false + for retry in $(seq "${retries}"); do + local tapfile="results-run-${retry}.tap" + local failfile="failed-run-${retry}." + + set -o noglob + ./run_sdk_container -n "${container_name}" -C "${packages_image}" -v "${vernum}" \ + ci-automation/vendor/testing/"${image}".sh \ + "${tests_dir}" \ + "${arch}" \ + "${vernum}" \ + "${tapfile}" \ + $@ + set +o noglob + + ./run_sdk_container -n "${container_name}" -C "${packages_image}" -v "${vernum}" \ + ci-automation/test_update_reruns.sh \ + "${tests_dir}/${tapfile}" "${image}" "${retry}" \ + "${tests_dir}/failed-run-${retry}.txt" + + local failed_tests="$(cat "${tests_dir}/failed-run-${retry}.txt")" + if [ -z "$failed_tests" ] ; then + echo "########### All tests succeeded. ###########" + success=true + break + fi + + echo "########### Some tests failed and will be re-run. ###########" + echo "Failed tests: $failed_tests" + echo "-----------" + set -- $failed_tests + done + + if ! $success; then + echo "########### All re-runs exhausted ($retries). Giving up. ###########" + fi + + # TODO: publish to bincache? + # "${tests_dir}/"*.tap + # "${tests_dir}/_kola_temp.tar.xz" + +} +# -- diff --git a/ci-automation/test_update_reruns.sh b/ci-automation/test_update_reruns.sh new file mode 100755 index 0000000000..e559158f31 --- /dev/null +++ b/ci-automation/test_update_reruns.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright (c) 2021 The Flatcar Maintainers. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Helper script for test.sh to update the test failures text file. +# test.sh uses this to determine which tests need to re-run. +# This script is run within the SDK container. + +set -eu + +tapfile="$1" +image="$2" +retry="$3" +outfile="$4" + +source ci-automation/tapfile_helper_lib.sh +tap_ingest_tapfile "${tapfile}" "${image}" "${retry}" +tap_failed_tests_for_vendor "${image}" | tee "${outfile}" diff --git a/ci-automation/vendor-testing/qemu.sh b/ci-automation/vendor-testing/qemu.sh new file mode 100644 index 0000000000..3f2dc95cff --- /dev/null +++ b/ci-automation/vendor-testing/qemu.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# +# Copyright (c) 2021 The Flatcar Maintainers. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Test execution script for the qemu vendor image. +# This script is supposed to run in the SDK container. + +function run_testsuite() { + local work_dir="$1"; shift + local arch="$2"; shift + local vernum="$3"; shift + local tapfile="$4"; shift + + # $@ now contains tests / test patterns to run + + source ci-automation/ci_automation_common.sh + + mkdir -p "${work_dir}" + cd "${work_dir}" + + copy_from_buildcache "images/${arch}/${vernum}/${QEMU_IMAGE_NAME}" . + + set -o noglob + + sudo kola run + --board="${arch}-usr" \ + --parallel="${QEMU_PARALLEL}" \ + --platform=qemu \ + --qemu-bios=/usr/share/qemu/bios-256k.bin \ + --qemu-image="${QEMU_IMAGE_NAME}" \ + --tapfile="${tapfile}" \ + --torcx-manifest="${CONTAINER_TORCX_ROOT}/${arch}-usr/latest/torcx_manifest.json" + $@ + + set +o noglob +}