mirror of
https://github.com/hashicorp/vault.git
synced 2025-08-18 04:27:02 +02:00
Add an updated `target_ec2_instances` module that is capable of dynamically splitting target instances over subnet/az's that are compatible with the AMI architecture and the associated instance type for the architecture. Use the `target_ec2_instances` module where necessary. Ensure that `raft` storage scenarios don't provision unnecessary infrastructure with a new `target_ec2_shim` module. After a lot of trial, the state of Ec2 spot instance capacity, their associated APIs, and current support for different fleet types in AWS Terraform provider, have proven to make using spot instances for scenario targets too unreliable. The current state of each method: * `target_ec2_fleet`: unusable due to the fact that the `instant` type does not guarantee fulfillment of either `spot` or `on-demand` instance request types. The module does support both `on-demand` and `spot` request types and is capable of bidding across a maximum of four availability zones, which makes it an attractive choice if the `instant` type would always fulfill requests. Perhaps a `request` type with `wait_for_fulfillment` option like `aws_spot_fleet_request` would make it more viable for future consideration. * `target_ec2_spot_fleet`: more reliable if bidding for target instances that have capacity in the chosen zone. Issues in the AWS provider prevent us from bidding across multiple zones succesfully. Over the last 2-3 months target capacity for the instance types we'd prefer to use has dropped dramatically and the price is near-or-at on-demand. The volatility for nearly no cost savings means we should put this option on the shelf for now. * `target_ec2_instances`: the most reliable method we've got. It is now capable of automatically determing which subnets and availability zones to provision targets in and has been updated to be usable for both Vault and Consul targets. By default we use the cheapest medium instance types that we've found are reliable to test vault. * Update .gitignore * enos/modules/create_vpc: create a subnet for every availability zone * enos/modules/target_ec2_fleet: bid across the maximum of four availability zones for targets * enos/modules/target_ec2_spot_fleet: attempt to make the spot fleet bid across more availability zones for targets * enos/modules/target_ec2_instances: create module to use ec2:RunInstances for scenario targets * enos/modules/target_ec2_shim: create shim module to satisfy the target module interface * enos/scenarios: use target_ec2_shim for backend targets on raft storage scenarios * enos/modules/az_finder: remove unsed module Signed-off-by: Ryan Cragun <me@ryan.ec>
336 lines
9.3 KiB
HCL
336 lines
9.3 KiB
HCL
terraform {
|
|
required_providers {
|
|
# We need to specify the provider source in each module until we publish it
|
|
# to the public registry
|
|
enos = {
|
|
source = "app.terraform.io/hashicorp-qti/enos"
|
|
version = ">= 0.3.24"
|
|
}
|
|
}
|
|
}
|
|
|
|
data "aws_vpc" "vpc" {
|
|
id = var.vpc_id
|
|
}
|
|
|
|
data "aws_subnets" "vpc" {
|
|
filter {
|
|
name = "vpc-id"
|
|
values = [var.vpc_id]
|
|
}
|
|
}
|
|
|
|
data "aws_kms_key" "kms_key" {
|
|
key_id = var.awskms_unseal_key_arn
|
|
}
|
|
|
|
data "aws_iam_policy_document" "target" {
|
|
statement {
|
|
resources = ["*"]
|
|
|
|
actions = [
|
|
"ec2:DescribeInstances",
|
|
"secretsmanager:*"
|
|
]
|
|
}
|
|
|
|
statement {
|
|
resources = [var.awskms_unseal_key_arn]
|
|
|
|
actions = [
|
|
"kms:DescribeKey",
|
|
"kms:ListKeys",
|
|
"kms:Encrypt",
|
|
"kms:Decrypt",
|
|
"kms:GenerateDataKey"
|
|
]
|
|
}
|
|
}
|
|
|
|
data "aws_iam_policy_document" "target_role" {
|
|
statement {
|
|
actions = ["sts:AssumeRole"]
|
|
|
|
principals {
|
|
type = "Service"
|
|
identifiers = ["ec2.amazonaws.com"]
|
|
}
|
|
}
|
|
}
|
|
|
|
data "enos_environment" "localhost" {}
|
|
|
|
resource "random_string" "random_cluster_name" {
|
|
length = 8
|
|
lower = true
|
|
upper = false
|
|
numeric = false
|
|
special = false
|
|
}
|
|
|
|
resource "random_string" "unique_id" {
|
|
length = 4
|
|
lower = true
|
|
upper = false
|
|
numeric = false
|
|
special = false
|
|
}
|
|
|
|
// ec2:CreateFleet only allows up to 4 InstanceRequirements overrides so we can only ever request
|
|
// a fleet across 4 or fewer subnets if we want to bid with InstanceRequirements instead of
|
|
// weighted instance types.
|
|
resource "random_shuffle" "subnets" {
|
|
input = data.aws_subnets.vpc.ids
|
|
result_count = 4
|
|
}
|
|
|
|
locals {
|
|
spot_allocation_strategy = "lowestPrice"
|
|
on_demand_allocation_strategy = "lowestPrice"
|
|
instances = toset([for idx in range(var.instance_count) : tostring(idx)])
|
|
cluster_name = coalesce(var.cluster_name, random_string.random_cluster_name.result)
|
|
name_prefix = "${var.project_name}-${local.cluster_name}-${random_string.unique_id.result}"
|
|
fleet_tag = "${local.name_prefix}-spot-fleet-target"
|
|
fleet_tags = {
|
|
Name = "${local.name_prefix}-${var.cluster_tag_key}-target"
|
|
"${var.cluster_tag_key}" = local.cluster_name
|
|
Fleet = local.fleet_tag
|
|
}
|
|
}
|
|
|
|
resource "aws_iam_role" "target" {
|
|
name = "${local.name_prefix}-target-role"
|
|
assume_role_policy = data.aws_iam_policy_document.target_role.json
|
|
}
|
|
|
|
resource "aws_iam_instance_profile" "target" {
|
|
name = "${local.name_prefix}-target-profile"
|
|
role = aws_iam_role.target.name
|
|
}
|
|
|
|
resource "aws_iam_role_policy" "target" {
|
|
name = "${local.name_prefix}-target-policy"
|
|
role = aws_iam_role.target.id
|
|
policy = data.aws_iam_policy_document.target.json
|
|
}
|
|
|
|
resource "aws_security_group" "target" {
|
|
name = "${local.name_prefix}-target"
|
|
description = "Target instance security group"
|
|
vpc_id = var.vpc_id
|
|
|
|
# SSH traffic
|
|
ingress {
|
|
from_port = 22
|
|
to_port = 22
|
|
protocol = "tcp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
# Vault traffic
|
|
ingress {
|
|
from_port = 8200
|
|
to_port = 8201
|
|
protocol = "tcp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
formatlist("%s/32", var.ssh_allow_ips)
|
|
])
|
|
}
|
|
|
|
# Consul traffic
|
|
ingress {
|
|
from_port = 8300
|
|
to_port = 8302
|
|
protocol = "tcp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
ingress {
|
|
from_port = 8301
|
|
to_port = 8302
|
|
protocol = "udp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
ingress {
|
|
from_port = 8500
|
|
to_port = 8503
|
|
protocol = "tcp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
ingress {
|
|
from_port = 8600
|
|
to_port = 8600
|
|
protocol = "tcp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
ingress {
|
|
from_port = 8600
|
|
to_port = 8600
|
|
protocol = "udp"
|
|
cidr_blocks = flatten([
|
|
formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses),
|
|
join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block),
|
|
])
|
|
}
|
|
|
|
# Internal traffic
|
|
ingress {
|
|
from_port = 0
|
|
to_port = 0
|
|
protocol = "-1"
|
|
self = true
|
|
}
|
|
|
|
# External traffic
|
|
egress {
|
|
from_port = 0
|
|
to_port = 0
|
|
protocol = "-1"
|
|
cidr_blocks = ["0.0.0.0/0"]
|
|
}
|
|
|
|
tags = merge(
|
|
var.common_tags,
|
|
{
|
|
Name = "${local.name_prefix}-sg"
|
|
},
|
|
)
|
|
}
|
|
|
|
resource "aws_launch_template" "target" {
|
|
name = "${local.name_prefix}-target"
|
|
image_id = var.ami_id
|
|
key_name = var.ssh_keypair
|
|
|
|
iam_instance_profile {
|
|
name = aws_iam_instance_profile.target.name
|
|
}
|
|
|
|
instance_requirements {
|
|
burstable_performance = "included"
|
|
|
|
memory_mib {
|
|
min = var.instance_mem_min
|
|
max = var.instance_mem_max
|
|
}
|
|
|
|
vcpu_count {
|
|
min = var.instance_cpu_min
|
|
max = var.instance_cpu_max
|
|
}
|
|
}
|
|
|
|
network_interfaces {
|
|
associate_public_ip_address = true
|
|
delete_on_termination = true
|
|
security_groups = [aws_security_group.target.id]
|
|
}
|
|
|
|
tag_specifications {
|
|
resource_type = "instance"
|
|
|
|
tags = merge(
|
|
var.common_tags,
|
|
local.fleet_tags,
|
|
)
|
|
}
|
|
}
|
|
|
|
# There are three primary knobs we can turn to try and optimize our costs by
|
|
# using a spot fleet: our min and max instance requirements, our max bid
|
|
# price, and the allocation strategy to use when fulfilling the spot request.
|
|
# We've currently configured our instance requirements to allow for anywhere
|
|
# from 2-4 vCPUs and 4-16GB of RAM. We intentionally have a wide range
|
|
# to allow for a large instance size pool to be considered. Our next knob is our
|
|
# max bid price. As we're using spot fleets to save on instance cost, we never
|
|
# want to pay more for an instance than we were on-demand. We've set the max price
|
|
# to equal what we pay for t3.medium instances on-demand, which are the smallest
|
|
# reliable size for Vault scenarios. The final knob is the allocation strategy
|
|
# that AWS will use when looking for instances that meet our resource and cost
|
|
# requirements. We're using the "lowestPrice" strategy to get the absolute
|
|
# cheapest machines that will fit the requirements, but it comes with a slightly
|
|
# higher capacity risk than say, "capacityOptimized" or "priceCapacityOptimized".
|
|
# Unless we see capacity issues or instances being shut down then we ought to
|
|
# stick with that strategy.
|
|
resource "aws_ec2_fleet" "targets" {
|
|
replace_unhealthy_instances = false
|
|
terminate_instances = true // terminate instances when we "delete" the fleet
|
|
terminate_instances_with_expiration = false
|
|
tags = merge(
|
|
var.common_tags,
|
|
local.fleet_tags,
|
|
)
|
|
type = "instant" // make a synchronous request for the entire fleet
|
|
|
|
launch_template_config {
|
|
launch_template_specification {
|
|
launch_template_id = aws_launch_template.target.id
|
|
version = aws_launch_template.target.latest_version
|
|
}
|
|
|
|
dynamic "override" {
|
|
for_each = random_shuffle.subnets.result
|
|
|
|
content {
|
|
subnet_id = override.value
|
|
}
|
|
}
|
|
}
|
|
|
|
on_demand_options {
|
|
allocation_strategy = local.on_demand_allocation_strategy
|
|
max_total_price = (var.max_price * var.instance_count)
|
|
min_target_capacity = var.capacity_type == "on-demand" ? var.instance_count : null
|
|
// One of these has to be set to enforce our on-demand target capacity minimum
|
|
single_availability_zone = false
|
|
single_instance_type = true
|
|
}
|
|
|
|
spot_options {
|
|
allocation_strategy = local.spot_allocation_strategy
|
|
// The instance_pools_to_use_count is only valid for the allocation_strategy
|
|
// lowestPrice. When we are using that strategy we'll want to always set it
|
|
// to non-zero to avoid rebuilding the fleet on a re-run. For any other strategy
|
|
// set it to zero to avoid rebuilding the fleet on a re-run.
|
|
instance_pools_to_use_count = local.spot_allocation_strategy == "lowestPrice" ? 1 : null
|
|
}
|
|
|
|
// Try and provision only spot instances and fall back to on-demand.
|
|
target_capacity_specification {
|
|
default_target_capacity_type = var.capacity_type
|
|
spot_target_capacity = var.capacity_type == "spot" ? var.instance_count : 0
|
|
on_demand_target_capacity = var.capacity_type == "on-demand" ? var.instance_count : 0
|
|
target_capacity_unit_type = "units" // units == instance count
|
|
total_target_capacity = var.instance_count
|
|
}
|
|
}
|
|
|
|
data "aws_instance" "targets" {
|
|
depends_on = [
|
|
aws_ec2_fleet.targets,
|
|
]
|
|
for_each = local.instances
|
|
|
|
instance_id = aws_ec2_fleet.targets.fleet_instance_set[0].instance_ids[each.key]
|
|
}
|