mirror of
https://github.com/siderolabs/omni.git
synced 2026-03-31 05:32:00 +02:00
Add a two-phase approach to the helm e2e test: first install Omni with embedded etcd and run a smoke test (omnictl get defaultjointoken), then uninstall and reinstall with external etcd for the full integration suite. Other changes: - Extract reusable extract_sa_key function - Split helm values into base + external etcd overlay to remove duplication - Move helm test values to hack/test/helm/templates/ and drop .envsubst suffix - Fix empty string arg bug in configure_registry_mirrors (remove dead else branch) Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
278 lines
11 KiB
Bash
Executable File
278 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Copyright (c) 2026 Sidero Labs, Inc.
|
|
#
|
|
# Use of this software is governed by the Business Source License
|
|
# included in the LICENSE file.
|
|
|
|
set -eoux pipefail
|
|
|
|
# Load common functions and variables.
|
|
source ./hack/test/common.sh
|
|
|
|
DIAG_DIR="${TEST_OUTPUTS_DIR}/cluster-diagnostics"
|
|
|
|
function gather_cluster_diagnostics() {
|
|
mkdir -p "${DIAG_DIR}/logs"
|
|
|
|
echo "Gathering cluster diagnostics into ${DIAG_DIR}..."
|
|
|
|
kubectl cluster-info dump >"${DIAG_DIR}/cluster-info-dump.txt" 2>&1 || true
|
|
kubectl get nodes -o wide >"${DIAG_DIR}/nodes.txt" 2>&1 || true
|
|
kubectl get all --all-namespaces -o wide >"${DIAG_DIR}/all-resources.txt" 2>&1 || true
|
|
kubectl get events --all-namespaces --sort-by='.lastTimestamp' >"${DIAG_DIR}/events.txt" 2>&1 || true
|
|
kubectl describe nodes >"${DIAG_DIR}/node-describe.txt" 2>&1 || true
|
|
kubectl describe pods --all-namespaces >"${DIAG_DIR}/pod-describe.txt" 2>&1 || true
|
|
kubectl describe deployments --all-namespaces >"${DIAG_DIR}/deployment-describe.txt" 2>&1 || true
|
|
kubectl describe statefulsets --all-namespaces >"${DIAG_DIR}/statefulset-describe.txt" 2>&1 || true
|
|
kubectl describe daemonsets --all-namespaces >"${DIAG_DIR}/daemonset-describe.txt" 2>&1 || true
|
|
kubectl top nodes >"${DIAG_DIR}/top-nodes.txt" 2>&1 || true
|
|
kubectl top pods --all-namespaces >"${DIAG_DIR}/top-pods.txt" 2>&1 || true
|
|
helm list --all-namespaces >"${DIAG_DIR}/helm-releases.txt" 2>&1 || true
|
|
|
|
# Dump logs for every pod across all namespaces.
|
|
# Use a pipe instead of process substitution to avoid /dev/fd issues under sudo.
|
|
kubectl get pods --all-namespaces -o jsonpath='{range .items[*]}{.metadata.namespace}/{.metadata.name}{"\n"}{end}' 2>/dev/null | while IFS='/' read -r ns pod; do
|
|
[ -z "${ns}" ] && continue
|
|
kubectl logs -n "${ns}" "${pod}" --all-containers >"${DIAG_DIR}/logs/${ns}_${pod}.log" 2>&1 || true
|
|
kubectl logs -n "${ns}" "${pod}" --all-containers --previous >"${DIAG_DIR}/logs/${ns}_${pod}-previous.log" 2>/dev/null || true
|
|
done || true
|
|
|
|
echo "Cluster diagnostics gathered."
|
|
}
|
|
|
|
HOST_CLUSTER_NAME="test-e2e-helm"
|
|
TEST_MACHINES_CLUSTER_NAME="test-helm-machines"
|
|
|
|
# Extract the initial service account key from the Omni pod via an ephemeral debug container.
|
|
function extract_sa_key() {
|
|
local namespace=$1
|
|
local output_path=$2
|
|
|
|
local pod
|
|
pod=$(kubectl get pod -n "${namespace}" -l app.kubernetes.io/name=omni -o jsonpath="{.items[0].metadata.name}")
|
|
|
|
kubectl debug -n "${namespace}" "${pod}" \
|
|
--image=busybox:1.36 --target=omni --profile=sysadmin --share-processes -- sleep 600
|
|
|
|
echo "Waiting for ephemeral container to be running..."
|
|
for i in $(seq 1 30); do
|
|
local status
|
|
status=$(kubectl get pod "${pod}" -n "${namespace}" -o jsonpath="{.status.ephemeralContainerStatuses[-1:].state.running}" 2>/dev/null || true)
|
|
if [ -n "${status}" ]; then
|
|
echo "Ephemeral container is running."
|
|
break
|
|
fi
|
|
if [ "${i}" -eq 30 ]; then
|
|
echo "Timed out waiting for ephemeral container to start."
|
|
exit 1
|
|
fi
|
|
sleep 2
|
|
done
|
|
|
|
local ephemeral_container
|
|
ephemeral_container=$(kubectl get pod "${pod}" -n "${namespace}" -o jsonpath="{.spec.ephemeralContainers[-1:].name}")
|
|
kubectl cp "${namespace}/${pod}:/proc/1/root/tmp/initial-service-account-key" "${output_path}" -c="${ephemeral_container}"
|
|
|
|
echo "Service account key extracted to ${output_path}"
|
|
cat "${output_path}"
|
|
}
|
|
|
|
function cleanup() {
|
|
gather_cluster_diagnostics || true
|
|
common_cleanup || true
|
|
|
|
# Ensure the output directories are always accessible by the non-root CI user for artifact upload.
|
|
chown -R "${SUDO_USER:-$(whoami)}" "${TEST_OUTPUTS_DIR}" || true
|
|
chown -R "${SUDO_USER:-$(whoami)}" "${ARTIFACTS}" || true
|
|
}
|
|
|
|
trap cleanup EXIT SIGINT
|
|
|
|
# Download required artifacts.
|
|
prepare_artifacts
|
|
|
|
# Configure registry mirrors.
|
|
configure_registry_mirrors
|
|
|
|
# Set kubeconfig env var.
|
|
export KUBECONFIG="${TEST_OUTPUTS_DIR}/$HOST_CLUSTER_NAME/kubeconfig"
|
|
|
|
# If using a localhost registry (e.g., when running this test locally), add a mirror so the Talos node can reach it via the bridge gateway.
|
|
TEMP_REGISTRY="${TEMP_REGISTRY:-127.0.0.1:5005}"
|
|
if [[ "${TEMP_REGISTRY}" == 127.0.0.1:* ]]; then
|
|
REGISTRY_MIRROR_FLAGS+=("--registry-mirror=${TEMP_REGISTRY}=http://172.24.0.1:${TEMP_REGISTRY##*:}")
|
|
fi
|
|
|
|
# Prepare the single-node Talos cluster.
|
|
create_talos_cluster name=$HOST_CLUSTER_NAME cp_count=1 wk_count=0 cidr=172.24.0.0/24 talos_version="${TALOS_VERSION}" skip_kubeconfig=false allow_scheduling_on_control_planes=true
|
|
|
|
kubectl get node -owide --show-labels
|
|
|
|
# Determine the node IP dynamically.
|
|
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}')
|
|
echo "Node IP: ${NODE_IP}"
|
|
|
|
# Add DNS entries for the test domains.
|
|
# example.org is needed for workload proxy: the test client resolves the base domain to connect, then uses the Host header for routing.
|
|
echo "${NODE_IP} example.org omni.example.org omni-siderolink.example.org omni-k8s.example.org" >>/etc/hosts
|
|
|
|
# Generate a short-lived leaf TLS certificate signed by the committed CA.
|
|
read -r TLS_CRT TLS_KEY <<<"$(hack/test/helm/generate-leaf-cert.sh "${NODE_IP}" | tr '\n' ' ')"
|
|
|
|
# Create the TLS secret for Traefik's default certificate store.
|
|
kubectl create secret tls example-org-wildcard-tls \
|
|
--cert="${TLS_CRT}" \
|
|
--key="${TLS_KEY}" \
|
|
-n kube-system
|
|
|
|
# Install Traefik ingress controller.
|
|
helm repo add traefik https://traefik.github.io/charts
|
|
helm repo update traefik
|
|
helm upgrade --install traefik traefik/traefik \
|
|
--namespace kube-system \
|
|
--values hack/test/helm/traefik-values.yaml \
|
|
--wait --timeout 300s
|
|
|
|
# Deploy a single-node external etcd for Omni's storage backend.
|
|
read -r ETCD_SERVER_CRT ETCD_SERVER_KEY ETCD_CLIENT_CRT ETCD_CLIENT_KEY <<< \
|
|
"$(hack/test/helm/generate-etcd-certs.sh | tr '\n' ' ')"
|
|
kubectl apply -f hack/test/helm/etcd.yaml
|
|
kubectl create secret generic etcd-certs -n etcd \
|
|
--from-file=ca.crt=hack/test/helm/certs/ca.crt \
|
|
--from-file=server.crt="${ETCD_SERVER_CRT}" \
|
|
--from-file=server.key="${ETCD_SERVER_KEY}"
|
|
kubectl rollout status deployment/etcd -n etcd --timeout=120s
|
|
|
|
# Build and push the omni image to the temp registry so the helm chart can use it.
|
|
OMNI_IMAGE_TAG="test-$(git rev-parse --short HEAD)"
|
|
OMNI_IMAGE_REPO="${TEMP_REGISTRY}/siderolabs/omni"
|
|
|
|
echo "Building and pushing omni image to ${OMNI_IMAGE_REPO}:${OMNI_IMAGE_TAG}..."
|
|
make image-omni REGISTRY="${TEMP_REGISTRY}" TAG="${OMNI_IMAGE_TAG}" PUSH=true PLATFORM=linux/amd64 WITH_DEBUG=true
|
|
|
|
# Run Helm chart unit tests.
|
|
make helm-plugin-install
|
|
make chart-unittest
|
|
|
|
# Install Omni via Helm.
|
|
OMNI_NAMESPACE="omni"
|
|
|
|
# Create the namespace with privileged pod security - Omni requires NET_ADMIN and hostPath (/dev/net/tun).
|
|
kubectl create namespace "${OMNI_NAMESPACE}"
|
|
kubectl label namespace "${OMNI_NAMESPACE}" pod-security.kubernetes.io/enforce=privileged
|
|
|
|
# Create the etcd client cert secret so Omni can connect to external etcd.
|
|
kubectl create secret generic etcd-client-certs -n "${OMNI_NAMESPACE}" \
|
|
--from-file=ca.crt=hack/test/helm/certs/ca.crt \
|
|
--from-file=client.crt="${ETCD_CLIENT_CRT}" \
|
|
--from-file=client.key="${ETCD_CLIENT_KEY}"
|
|
|
|
# Build a JSON array of registry mirrors for envsubst.
|
|
REGISTRY_MIRRORS_JSON="[]"
|
|
mirrors_items=()
|
|
for flag in ${REGISTRY_MIRROR_FLAGS[@]+"${REGISTRY_MIRROR_FLAGS[@]}"}; do
|
|
mirror="${flag#--registry-mirror=}"
|
|
[ -z "${mirror}" ] && continue
|
|
mirrors_items+=("\"${mirror}\"")
|
|
done
|
|
if [ ${#mirrors_items[@]} -gt 0 ]; then
|
|
REGISTRY_MIRRORS_JSON="[$(
|
|
IFS=,
|
|
echo "${mirrors_items[*]}"
|
|
)]"
|
|
fi
|
|
export REGISTRY_MIRRORS_JSON
|
|
|
|
# Render the Helm values template with envsubst.
|
|
export OMNI_IMAGE_REPO OMNI_IMAGE_TAG AUTH0_CLIENT_ID AUTH0_DOMAIN AUTH0_TEST_USERNAME NODE_IP JOIN_TOKEN
|
|
|
|
# ============================================================
|
|
# Phase 1: Smoke test with embedded etcd
|
|
# ============================================================
|
|
RENDERED_BASE_VALUES=$(mktemp)
|
|
envsubst <hack/test/helm/templates/omni-values.yaml >"${RENDERED_BASE_VALUES}"
|
|
|
|
echo "Installing Omni with embedded etcd..."
|
|
helm upgrade --install omni deploy/helm/omni/ \
|
|
--namespace "${OMNI_NAMESPACE}" \
|
|
--values "${RENDERED_BASE_VALUES}" \
|
|
--wait --timeout 300s
|
|
|
|
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=omni -n "${OMNI_NAMESPACE}" --timeout=120s
|
|
|
|
extract_sa_key "${OMNI_NAMESPACE}" "${ARTIFACTS}/omni-sa-key"
|
|
|
|
echo "Running embedded etcd smoke test..."
|
|
OMNICTL_OUTPUT=$(OMNI_ENDPOINT=https://omni.example.org \
|
|
OMNI_SERVICE_ACCOUNT_KEY=$(cat "${ARTIFACTS}/omni-sa-key") \
|
|
SSL_CERT_DIR=hack/test/helm/certs:/etc/ssl/certs \
|
|
"${ARTIFACTS}"/omnictl-linux-amd64 get defaultjointoken -oyaml)
|
|
|
|
echo "${OMNICTL_OUTPUT}"
|
|
|
|
if ! echo "${OMNICTL_OUTPUT}" | grep -q "tokenid: ${JOIN_TOKEN}"; then
|
|
echo "ERROR: expected 'tokenid: ${JOIN_TOKEN}' in omnictl output"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Embedded etcd smoke test passed."
|
|
|
|
echo "Uninstalling Omni (embedded etcd)..."
|
|
helm uninstall omni --namespace "${OMNI_NAMESPACE}" --wait --timeout 120s
|
|
kubectl wait --for=delete pod -l app.kubernetes.io/name=omni -n "${OMNI_NAMESPACE}" --timeout=120s
|
|
|
|
# ============================================================
|
|
# Phase 2: Install with external etcd (main test)
|
|
# ============================================================
|
|
echo "Installing Omni with external etcd..."
|
|
helm upgrade --install omni deploy/helm/omni/ \
|
|
--namespace "${OMNI_NAMESPACE}" \
|
|
--values "${RENDERED_BASE_VALUES}" \
|
|
--values hack/test/helm/templates/omni-external-etcd-values.yaml \
|
|
--wait --timeout 300s
|
|
|
|
# Wait for Omni pod to be ready.
|
|
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=omni -n "${OMNI_NAMESPACE}" --timeout=120s
|
|
|
|
extract_sa_key "${OMNI_NAMESPACE}" "${ARTIFACTS}/omni-sa-key"
|
|
|
|
# Build a schematic with SideroLink kernel args pointing to the Helm-deployed Omni.
|
|
HELM_SCHEMATIC=$(
|
|
cat <<EOF
|
|
customization:
|
|
extraKernelArgs:
|
|
- siderolink.api=grpc://${NODE_IP}:30090?jointoken=${JOIN_TOKEN}
|
|
- talos.events.sink=[fdae:41e4:649b:9303::1]:8091
|
|
- talos.logging.kernel=tcp://[fdae:41e4:649b:9303::1]:8092
|
|
- console=tty0
|
|
- console=ttyS0
|
|
systemExtensions:
|
|
officialExtensions:
|
|
- siderolabs/hello-world-service
|
|
EOF
|
|
)
|
|
|
|
HELM_SCHEMATIC_ID=$(curl -X POST --data-binary "${HELM_SCHEMATIC}" https://factory.talos.dev/schematics | jq -r '.id')
|
|
echo "Schematic ID: ${HELM_SCHEMATIC_ID}"
|
|
|
|
# Create a single machine to connect to Omni.
|
|
create_machines name=$TEST_MACHINES_CLUSTER_NAME count=1 cidr=172.25.0.0/24 secure_boot=false uki=true use_partial_config=false talos_version="${TALOS_VERSION}" \
|
|
kernel_args_schematic_id="${HELM_SCHEMATIC_ID}" partial_config_schematic_id=""
|
|
|
|
# Run the integration tests against the Helm-deployed Omni instance.
|
|
OMNI_ENDPOINT=https://omni.example.org \
|
|
OMNI_SERVICE_ACCOUNT_KEY=$(cat "${ARTIFACTS}/omni-sa-key") \
|
|
SIDEROLINK_DEV_JOIN_TOKEN=${JOIN_TOKEN} \
|
|
SSL_CERT_DIR=hack/test/helm/certs:/etc/ssl/certs \
|
|
"${ARTIFACTS}"/integration-test-linux-amd64 \
|
|
--omni.endpoint=https://omni.example.org \
|
|
--omni.expected-machines=1 \
|
|
--omni.talos-version="${TALOS_VERSION}" \
|
|
--omni.kubernetes-version="${KUBERNETES_VERSION}" \
|
|
--omni.omnictl-path="${ARTIFACTS}"/omnictl-linux-amd64 \
|
|
--omni.output-dir="${TEST_OUTPUTS_DIR}" \
|
|
--omni.log-output="${TEST_OUTPUTS_DIR}/omni-helm-integration.log" \
|
|
--test.run "TestIntegration/Suites/(CleanState|SingleNodeWorkloadProxy)$" \
|
|
--test.v
|