From 79ca1a3dfb485fc5180bda38ab58a2d4c595a6aa Mon Sep 17 00:00:00 2001 From: Noel Georgi Date: Thu, 3 Aug 2023 20:50:54 +0530 Subject: [PATCH] feat: e2e-aws using tf code e2e-aws using TF code. Signed-off-by: Noel Georgi --- .drone.jsonnet | 69 +++++++++++++++-- hack/cloud-image-uploader.sh | 2 +- hack/cloud-image-uploader/aws.go | 4 + hack/cloud-image-uploader/main.go | 37 ++++++---- hack/cloud-image-uploader/options.go | 3 + hack/test/e2e-aws-prepare.sh | 16 ++++ hack/test/e2e-aws.sh | 106 +++++++++------------------ hack/test/tfvars/aws.jq | 15 ++++ 8 files changed, 161 insertions(+), 91 deletions(-) create mode 100755 hack/test/e2e-aws-prepare.sh create mode 100644 hack/test/tfvars/aws.jq diff --git a/.drone.jsonnet b/.drone.jsonnet index e503a39a6..4c4c50246 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -131,7 +131,7 @@ local Step(name, image='', target='', privileged=false, depends_on=[], environme // TriggerDownstream is a helper function for creating a step that triggers a // downstream pipeline. It is used to standardize the creation of these steps. -local TriggerDownstream(name, target, repositories, image='', params=[], depends_on=[]) = { +local TriggerDownstream(name, target, repositories, image='', params=[], depends_on=[], when={}) = { name: name, image: if image == '' then downstream_image else image, settings: { @@ -146,6 +146,7 @@ local TriggerDownstream(name, target, repositories, image='', params=[], depends deploy: target, }, depends_on: [x.name for x in depends_on], + when: when, }; // This provides the docker service. @@ -326,7 +327,7 @@ local load_artifacts = Step( extra_commands=[ 'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"', 'mkdir -p _out/', - 'az storage blob download-batch --overwrite true -d _out -s ${CI_COMMIT_SHA}${DRONE_TAG//./-}', + 'az storage blob download-batch --overwrite -d _out -s ${CI_COMMIT_SHA}${DRONE_TAG//./-}', 'chmod +x _out/clusterctl _out/integration-test-linux-amd64 _out/module-sig-verify-linux-amd64 _out/kubectl _out/kubestr _out/helm _out/cilium _out/talosctl*', ] ); @@ -340,7 +341,7 @@ local extensions_build = TriggerDownstream( std.format('REGISTRY=%s', local_registry), 'PLATFORM=linux/amd64', 'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}', - '_out/talos-metadata', + '_out/talos-metadata', // params passed from file with KEY=VALUE format ], depends_on=[load_artifacts], ); @@ -642,7 +643,65 @@ local capi_docker = Step('e2e-docker', depends_on=[load_artifacts], target='e2e- INTEGRATION_TEST_RUN: 'XXX', }); local e2e_capi = Step('e2e-capi', depends_on=[capi_docker], environment=creds_env_vars); -local e2e_aws = Step('e2e-aws', depends_on=[e2e_capi], environment=creds_env_vars); + +local e2e_aws_prepare = Step( + 'cloud-images', + depends_on=[ + load_artifacts, + ], + environment=creds_env_vars { + CLOUD_IMAGES_EXTRA_ARGS: '--name-prefix talos-e2e --target-clouds aws --architectures amd64 --aws-regions us-east-1', + }, + extra_commands=[ + 'make e2e-aws-prepare', + 'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"', + 'az storage blob upload-batch --overwrite -s _out --pattern "e2e-aws-generated/*" -d "${CI_COMMIT_SHA}${DRONE_TAG//./-}"', + ] +); + +local tf_apply = TriggerDownstream( + 'tf-apply', + 'e2e-talos-tf-apply', + ['siderolabs/contrib@main'], + params=[ + 'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}', + 'TYPE=aws', + 'AWS_DEFAULT_REGION=us-east-1', + ], + depends_on=[e2e_aws_prepare], +); + +local e2e_aws_tf_apply_post = Step( + 'e2e-aws-download-artifacts', + with_make=false, + environment=creds_env_vars, + extra_commands=[ + 'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"', + 'az storage blob download -f _out/e2e-aws-talosconfig -n e2e-aws-talosconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}', + 'az storage blob download -f _out/e2e-aws-kubeconfig -n e2e-aws-kubeconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}', + ], + depends_on=[tf_apply], +); + +local e2e_aws = Step('e2e-aws', depends_on=[e2e_aws_tf_apply_post], environment=creds_env_vars); + +local tf_destroy = TriggerDownstream( + 'tf-destroy', + 'e2e-talos-tf-destroy', + ['siderolabs/contrib@main'], + params=[ + 'TYPE=aws', + 'AWS_DEFAULT_REGION=us-east-1', + ], + depends_on=[e2e_aws], + when={ + status: [ + 'failure', + 'success', + ], + }, +); + local e2e_azure = Step('e2e-azure', depends_on=[e2e_capi], environment=creds_env_vars); local e2e_gcp = Step('e2e-gcp', depends_on=[e2e_capi], environment=creds_env_vars); @@ -656,7 +715,7 @@ local e2e_trigger(names) = { local e2e_pipelines = [ // regular pipelines, triggered on promote events - Pipeline('e2e-aws', default_pipeline_steps + [capi_docker, e2e_capi, e2e_aws]) + e2e_trigger(['e2e-aws']), + Pipeline('e2e-aws', default_pipeline_steps + [e2e_aws_prepare, tf_apply, e2e_aws_tf_apply_post, e2e_aws, tf_destroy]) + e2e_trigger(['e2e-aws']), Pipeline('e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp]) + e2e_trigger(['e2e-gcp']), // cron pipelines, triggered on schedule events diff --git a/hack/cloud-image-uploader.sh b/hack/cloud-image-uploader.sh index e13f6c76b..1ef91f115 100755 --- a/hack/cloud-image-uploader.sh +++ b/hack/cloud-image-uploader.sh @@ -4,4 +4,4 @@ set -e cd hack/cloud-image-uploader -go run . --artifacts-path="../../${ARTIFACTS}" --tag="${TAG}" --abbrev-tag="${ABBREV_TAG}" +go run . --artifacts-path="../../${ARTIFACTS}" --tag="${TAG}" --abbrev-tag="${ABBREV_TAG}" "$@" diff --git a/hack/cloud-image-uploader/aws.go b/hack/cloud-image-uploader/aws.go index c67b7a366..4f66fb153 100644 --- a/hack/cloud-image-uploader/aws.go +++ b/hack/cloud-image-uploader/aws.go @@ -253,6 +253,10 @@ func (au *AWSUploader) registerAMIArch(ctx context.Context, region string, svc * imageName := fmt.Sprintf("talos-%s-%s-%s", au.Options.Tag, region, arch) + if au.Options.NamePrefix != "" { + imageName = fmt.Sprintf("%s-%s-%s-%s", au.Options.NamePrefix, au.Options.Tag, region, arch) + } + imageResp, err := svc.DescribeImagesWithContext(ctx, &ec2.DescribeImagesInput{ Filters: []*ec2.Filter{ { diff --git a/hack/cloud-image-uploader/main.go b/hack/cloud-image-uploader/main.go index 662d7c292..e530d34fe 100644 --- a/hack/cloud-image-uploader/main.go +++ b/hack/cloud-image-uploader/main.go @@ -61,10 +61,12 @@ func run() error { log.Printf("failed to get a list of enabled AWS regions: %s, ignored", err) } + pflag.StringSliceVar(&DefaultOptions.TargetClouds, "target-clouds", DefaultOptions.TargetClouds, "cloud targets to upload to") pflag.StringSliceVar(&DefaultOptions.Architectures, "architectures", DefaultOptions.Architectures, "list of architectures to process") pflag.StringVar(&DefaultOptions.ArtifactsPath, "artifacts-path", DefaultOptions.ArtifactsPath, "artifacts path") pflag.StringVar(&DefaultOptions.Tag, "tag", DefaultOptions.Tag, "tag (version) of the uploaded image") pflag.StringVar(&DefaultOptions.AzureAbbrevTag, "abbrev-tag", DefaultOptions.AzureAbbrevTag, "abbreviated tag (version) of the uploaded image") + pflag.StringVar(&DefaultOptions.NamePrefix, "name-prefix", DefaultOptions.NamePrefix, "prefix for the name of the uploaded image") pflag.StringSliceVar(&DefaultOptions.AWSRegions, "aws-regions", DefaultOptions.AWSRegions, "list of AWS regions to upload to") @@ -84,21 +86,28 @@ func run() error { g, ctx = errgroup.WithContext(ctx) - g.Go(func() error { - aws := AWSUploader{ - Options: DefaultOptions, + for _, target := range DefaultOptions.TargetClouds { + switch target { + case "aws": + g.Go(func() error { + aws := AWSUploader{ + Options: DefaultOptions, + } + + return aws.Upload(ctx) + }) + case "azure": + g.Go(func() error { + azure := AzureUploader{ + Options: DefaultOptions, + } + + return azure.AzureGalleryUpload(ctx) + }) + default: + return fmt.Errorf("unknown target: %s", target) } - - return aws.Upload(ctx) - }) - - g.Go(func() error { - azure := AzureUploader{ - Options: DefaultOptions, - } - - return azure.AzureGalleryUpload(ctx) - }) + } if err = g.Wait(); err != nil { return fmt.Errorf("failed: %w", err) diff --git a/hack/cloud-image-uploader/options.go b/hack/cloud-image-uploader/options.go index 8507d7ac3..8ee2cb720 100644 --- a/hack/cloud-image-uploader/options.go +++ b/hack/cloud-image-uploader/options.go @@ -13,7 +13,9 @@ import ( type Options struct { Tag string ArtifactsPath string + NamePrefix string Architectures []string + TargetClouds []string // AWS options. AWSRegions []string @@ -36,6 +38,7 @@ type Location struct { var DefaultOptions = Options{ ArtifactsPath: "_out/", Architectures: []string{"amd64", "arm64"}, + TargetClouds: []string{"aws", "azure"}, } // AWSImage returns path to AWS pre-built image. diff --git a/hack/test/e2e-aws-prepare.sh b/hack/test/e2e-aws-prepare.sh new file mode 100755 index 000000000..8d30765d1 --- /dev/null +++ b/hack/test/e2e-aws-prepare.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -eou pipefail + +source ./hack/test/e2e.sh + +REGION="us-east-1" + +AMI_ID=$(jq -r ".[] | select(.region == \"${REGION}\") | select (.arch == \"amd64\") | .id" "${ARTIFACTS}/cloud-images.json") + +mkdir -p "${ARTIFACTS}/e2e-aws-generated" + +NAME_PREFIX="talos-e2e-${SHA}-aws" + +jq --null-input --arg AMI_ID "${AMI_ID}" --arg CLUSTER_NAME "${NAME_PREFIX}" --arg KUBERNETES_VERSION "${KUBERNETES_VERSION}" '{ami_id: $AMI_ID, cluster_name: $CLUSTER_NAME, kubernetes_version: $KUBERNETES_VERSION}' \ + | jq -f hack/test/tfvars/aws.jq > "${ARTIFACTS}/e2e-aws-generated/vars.json" diff --git a/hack/test/e2e-aws.sh b/hack/test/e2e-aws.sh index 5c74728cb..8e663c745 100755 --- a/hack/test/e2e-aws.sh +++ b/hack/test/e2e-aws.sh @@ -4,85 +4,49 @@ set -eou pipefail source ./hack/test/e2e.sh -REGION="us-east-1" -BUCKET="talos-ci-e2e" +cp "${ARTIFACTS}/e2e-aws-talosconfig" "${TALOSCONFIG}" +cp "${ARTIFACTS}/e2e-aws-kubeconfig" "${KUBECONFIG}" -function setup { - # Setup svc account - mkdir -p ${TMP} +# Wait for nodes to check in +timeout=$(($(date +%s) + TIMEOUT)) +until ${KUBECTL} get nodes -o go-template='{{ len .items }}' | grep ${NUM_NODES} >/dev/null; do + [[ $(date +%s) -gt $timeout ]] && exit 1 + ${KUBECTL} get nodes -o wide && : + sleep 10 +done - # Uncompress image - xz -d < ${ARTIFACTS}/aws-amd64.raw.xz > ${TMP}/disk.raw +# Wait for nodes to be ready +timeout=$(($(date +%s) + TIMEOUT)) +until ${KUBECTL} wait --timeout=1s --for=condition=ready=true --all nodes > /dev/null; do + [[ $(date +%s) -gt $timeout ]] && exit 1 + ${KUBECTL} get nodes -o wide && : + sleep 10 +done - # Upload Image - echo "uploading image to s3" - aws s3 cp --quiet ${TMP}/disk.raw s3://${BUCKET}/aws-${TAG}.raw +# Verify that we have an HA controlplane +timeout=$(($(date +%s) + TIMEOUT)) +until ${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' -o go-template='{{ len .items }}' | grep 3 > /dev/null; do + [[ $(date +%s) -gt $timeout ]] && exit 1 + ${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' && : + sleep 10 +done - # Create snapshot from image - echo "importing snapshot from s3" - import_task_id=$(aws ec2 import-snapshot --region ${REGION} --description "talos e2e ${TAG}" --disk-container "Format=raw,UserBucket={S3Bucket=${BUCKET},S3Key=aws-${TAG}.raw}" | jq -r '.ImportTaskId') - echo ${import_task_id} +CONTROLPLANE0_NODE_NAME=$(${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' -o jsonpath='{.items[0].metadata.name}') - # Wait for import to complete - echo "waiting for snapshot import to complete" - snapshot_status=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \ - jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.Status') - while [ ${snapshot_status} != "completed" ]; do - sleep 5 - snapshot_status=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \ - jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.Status') - done - snapshot_id=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \ - jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.SnapshotId') - echo ${snapshot_id} +# Wait until we have an IP for first controlplane node +timeout=$(($(date +%s) + TIMEOUT)) +until [ -n "$(${KUBECTL} get nodes "${CONTROLPLANE0_NODE_NAME}" -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}')" ]; do + [[ $(date +%s) -gt $timeout ]] && exit 1 + sleep 10 +done - # Create AMI - image_id=$(aws ec2 describe-images --region ${REGION} --filters="Name=name,Values=talos-e2e-${TAG}" | jq -r '.Images[0].ImageId') || true - if [[ ${image_id} != "null" ]]; then - aws ec2 deregister-image --region ${REGION} --image-id ${image_id} - fi +# lets get the ip of the first controlplane node +CONTROLPLANE0_NODE=$(${KUBECTL} get nodes "${CONTROLPLANE0_NODE_NAME}" -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}') - ami=$(aws ec2 register-image --region ${REGION} \ - --block-device-mappings "DeviceName=/dev/xvda,VirtualName=talostest,Ebs={DeleteOnTermination=true,SnapshotId=${snapshot_id},VolumeSize=20,VolumeType=gp2}" \ - --root-device-name /dev/xvda \ - --virtualization-type hvm \ - --architecture x86_64 \ - --ena-support \ - --name talos-e2e-${TAG} | jq -r '.ImageId') +# set the talosconfig to use the first controlplane ip +${TALOSCTL} config endpoint "${CONTROLPLANE0_NODE}" +${TALOSCTL} config node "${CONTROLPLANE0_NODE}" - ## Cluster-wide vars - export CLUSTER_NAME=${NAME_PREFIX} - export AWS_REGION=us-east-1 - export AWS_SSH_KEY_NAME=talos-e2e - export AWS_VPC_ID=vpc-ff5c5687 - export AWS_SUBNET=subnet-c4e9b3a0 - export AWS_SUBNET_AZ=us-east-1a - export CALICO_VERSION=v3.24.1 - export AWS_CLOUD_PROVIDER_VERSION=v1.20.0-alpha.0 - - ## Control plane vars - export CONTROL_PLANE_MACHINE_COUNT=3 - export AWS_CONTROL_PLANE_MACHINE_TYPE=t3.large - export AWS_CONTROL_PLANE_VOL_SIZE=50 - export AWS_CONTROL_PLANE_AMI_ID=${ami} - export AWS_CONTROL_PLANE_ADDL_SEC_GROUPS='[{id: sg-ebe8e59f}]' - export AWS_CONTROL_PLANE_IAM_PROFILE=CAPI_AWS_ControlPlane - - ## Worker vars - export WORKER_MACHINE_COUNT=3 - export AWS_NODE_MACHINE_TYPE=t3.large - export AWS_NODE_VOL_SIZE=50 - export AWS_NODE_AMI_ID=${ami} - export AWS_NODE_ADDL_SEC_GROUPS='[{id: sg-ebe8e59f}]' - export AWS_NODE_IAM_PROFILE=CAPI_AWS_Worker - - ${CLUSTERCTL} generate cluster ${NAME_PREFIX} \ - --kubeconfig /tmp/e2e/docker/kubeconfig \ - --from https://github.com/siderolabs/cluster-api-templates/blob/main/aws/standard/standard.yaml > ${TMP}/cluster.yaml -} - -setup -create_cluster_capi aws run_talos_integration_test run_kubernetes_integration_test diff --git a/hack/test/tfvars/aws.jq b/hack/test/tfvars/aws.jq new file mode 100644 index 000000000..fac9ad46e --- /dev/null +++ b/hack/test/tfvars/aws.jq @@ -0,0 +1,15 @@ +{ + "cluster_name": .cluster_name, + "num_control_planes": 3, + "num_workers": 3, + "ami_id": .ami_id, + "ccm": true, + "kubernetes_version": .kubernetes_version, + "instance_type_control_plane": "t3.large", + "instance_type_worker": "t3.large", + "extra_tags": { + "Name": .cluster_name, + "Project": "talos-e2e-ci", + "Environment": "ci" + } +}