mirror of
https://github.com/apricote/hcloud-upload-image.git
synced 2025-08-06 22:06:58 +02:00
The base image used requires ~0.42Gi. Even if the uploaded image is smaller, those bytes are currently not overwritten and still part of the stored snapshot. By zeroing the root disk first, those unwanted bytes are removed and not stored with the snapshot. This has two benefits: 1. Snapshots are billed by their compressed (shown) size, so small images are now a bit cheaper. 2. The time it takes to create a server from the snapshot scales with the snapshot size, so smaller snapshots means the server can start more quickly. This reduces the size of an example Talos x86 image from 0.42Gi before, to 0.2Gi afterwards. An example Flatcar image was 0.47Gi before, and still has that size with this patch. There are two ways to zero out the disk: - `dd if=/dev/zero of=/dev/sda` actually writes zeroes to every block on the device. This takes around a minute to do. - `blkdiscard /dev/sda` talks to the disk direclty and instructs it to discard all blocks. This only takes around 5 seconds. As both have the same effect on image size, but `blkdiscard` is SO MUCH faster, I have decided to use it. Even though only small images benefit from this, this is now enabled by default as the downside (5 second slower upload) does not justify additional flags or options to enable/disable this. Closes #96
548 lines
17 KiB
Go
548 lines
17 KiB
Go
package hcloudimages
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
"time"
|
|
|
|
"github.com/hetznercloud/hcloud-go/v2/hcloud"
|
|
"github.com/hetznercloud/hcloud-go/v2/hcloud/exp/kit/sshutil"
|
|
"golang.org/x/crypto/ssh"
|
|
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/contextlogger"
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/actionutil"
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/control"
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/labelutil"
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/randomid"
|
|
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/sshsession"
|
|
)
|
|
|
|
const (
|
|
CreatedByLabel = "apricote.de/created-by"
|
|
CreatedByValue = "hcloud-upload-image"
|
|
|
|
resourcePrefix = "hcloud-upload-image-"
|
|
)
|
|
|
|
var (
|
|
DefaultLabels = map[string]string{
|
|
CreatedByLabel: CreatedByValue,
|
|
}
|
|
|
|
serverTypePerArchitecture = map[hcloud.Architecture]*hcloud.ServerType{
|
|
hcloud.ArchitectureX86: {Name: "cx22"},
|
|
hcloud.ArchitectureARM: {Name: "cax11"},
|
|
}
|
|
|
|
defaultImage = &hcloud.Image{Name: "ubuntu-24.04"}
|
|
defaultLocation = &hcloud.Location{Name: "fsn1"}
|
|
defaultRescueType = hcloud.ServerRescueTypeLinux64
|
|
|
|
defaultSSHDialTimeout = 1 * time.Minute
|
|
|
|
// Size observed on x86, 2025-05-03, no idea if that changes.
|
|
// Might be able to extends this to more of the available memory.
|
|
rescueSystemRootDiskSizeMB int64 = 960
|
|
)
|
|
|
|
type UploadOptions struct {
|
|
// ImageURL must be publicly available. The instance will download the image from this endpoint.
|
|
ImageURL *url.URL
|
|
|
|
// ImageReader
|
|
ImageReader io.Reader
|
|
|
|
// ImageCompression describes the compression of the referenced image file. It defaults to [CompressionNone]. If
|
|
// set to anything else, the file will be decompressed before written to the disk.
|
|
ImageCompression Compression
|
|
|
|
ImageFormat Format
|
|
|
|
// Can be optionally set to make the client validate that the image can be written to the server.
|
|
ImageSize int64
|
|
|
|
// Possible future additions:
|
|
// ImageSignatureVerification
|
|
// ImageLocalPath
|
|
|
|
// Architecture should match the architecture of the Image. This decides if the Snapshot can later be
|
|
// used with [hcloud.ArchitectureX86] or [hcloud.ArchitectureARM] servers.
|
|
//
|
|
// Internally this decides what server type is used for the temporary server.
|
|
//
|
|
// Optional if [UploadOptions.ServerType] is set.
|
|
Architecture hcloud.Architecture
|
|
|
|
// ServerType can be optionally set to override the default server type for the architecture.
|
|
// Situations where this makes sense:
|
|
//
|
|
// - Your image is larger than the root disk of the default server types.
|
|
// - The default server type is no longer available, or not temporarily out of stock.
|
|
ServerType *hcloud.ServerType
|
|
|
|
// Description is an optional description that the resulting image (snapshot) will have. There is no way to
|
|
// select images by its description, you should use Labels if you need to identify your image later.
|
|
Description *string
|
|
|
|
// Labels will be added to the resulting image (snapshot). Use these to filter the image list if you
|
|
// need to identify the image later on.
|
|
//
|
|
// We also always add a label `apricote.de/created-by=hcloud-image-upload` ([CreatedByLabel], [CreatedByValue]).
|
|
Labels map[string]string
|
|
|
|
// DebugSkipResourceCleanup will skip the cleanup of the temporary SSH Key and Server.
|
|
DebugSkipResourceCleanup bool
|
|
}
|
|
|
|
type Compression string
|
|
|
|
const (
|
|
CompressionNone Compression = ""
|
|
CompressionBZ2 Compression = "bz2"
|
|
CompressionXZ Compression = "xz"
|
|
|
|
// Possible future additions:
|
|
// zip,zstd
|
|
)
|
|
|
|
type Format string
|
|
|
|
const (
|
|
FormatRaw Format = ""
|
|
|
|
// FormatQCOW2 allows to upload images in the qcow2 format directly.
|
|
//
|
|
// The qcow2 image must fit on the disk available in the rescue system. "qemu-img dd", which is used to convert
|
|
// qcow2 to raw, requires a file as an input. If [UploadOption.ImageSize] is set and FormatQCOW2 is used, there is a
|
|
// warning message displayed if there is a high probability of issues.
|
|
FormatQCOW2 Format = "qcow2"
|
|
)
|
|
|
|
// NewClient instantiates a new client. It requires a working [*hcloud.Client] to interact with the Hetzner Cloud API.
|
|
func NewClient(c *hcloud.Client) *Client {
|
|
return &Client{
|
|
c: c,
|
|
}
|
|
}
|
|
|
|
type Client struct {
|
|
c *hcloud.Client
|
|
}
|
|
|
|
// Upload the specified image into a snapshot on Hetzner Cloud.
|
|
//
|
|
// As the Hetzner Cloud API has no direct way to upload images, we create a temporary server,
|
|
// overwrite the root disk and take a snapshot of that disk instead.
|
|
//
|
|
// The temporary server costs money. If the upload fails, we might be unable to delete the server. Check out
|
|
// CleanupTempResources for a helper in this case.
|
|
func (s *Client) Upload(ctx context.Context, options UploadOptions) (*hcloud.Image, error) {
|
|
logger := contextlogger.From(ctx).With(
|
|
"library", "hcloudimages",
|
|
"method", "upload",
|
|
)
|
|
|
|
id, err := randomid.Generate()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
logger = logger.With("run-id", id)
|
|
// For simplicity, we use the name random name for SSH Key + Server
|
|
resourceName := resourcePrefix + id
|
|
labels := labelutil.Merge(DefaultLabels, options.Labels)
|
|
|
|
// 0. Validations
|
|
if options.ImageFormat == FormatQCOW2 && options.ImageSize > 0 {
|
|
if options.ImageSize > rescueSystemRootDiskSizeMB*1024*1024 {
|
|
// Just a warning, because the size might change with time.
|
|
// Alternatively one could add an override flag for the check and make this an error.
|
|
logger.WarnContext(ctx,
|
|
fmt.Sprintf("image must be smaller than %d MB (rescue system root disk) for qcow2", rescueSystemRootDiskSizeMB),
|
|
"maximum-size", rescueSystemRootDiskSizeMB,
|
|
"actual-size", options.ImageSize/(1024*1024),
|
|
)
|
|
}
|
|
}
|
|
|
|
// 1. Create SSH Key
|
|
logger.InfoContext(ctx, "# Step 1: Generating SSH Key")
|
|
privateKey, publicKey, err := sshutil.GenerateKeyPair()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to generate temporary ssh key pair: %w", err)
|
|
}
|
|
|
|
key, _, err := s.c.SSHKey.Create(ctx, hcloud.SSHKeyCreateOpts{
|
|
Name: resourceName,
|
|
PublicKey: string(publicKey),
|
|
Labels: labels,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to submit temporary ssh key to API: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "Uploaded ssh key", "ssh-key-id", key.ID)
|
|
defer func() {
|
|
// Cleanup SSH Key
|
|
if options.DebugSkipResourceCleanup {
|
|
logger.InfoContext(ctx, "Cleanup: Skipping cleanup of temporary ssh key")
|
|
return
|
|
}
|
|
|
|
logger.InfoContext(ctx, "Cleanup: Deleting temporary ssh key")
|
|
|
|
_, err := s.c.SSHKey.Delete(ctx, key)
|
|
if err != nil {
|
|
logger.WarnContext(ctx, "Cleanup: ssh key could not be deleted", "error", err)
|
|
// TODO
|
|
}
|
|
}()
|
|
|
|
// 2. Create Server
|
|
logger.InfoContext(ctx, "# Step 2: Creating Server")
|
|
var serverType *hcloud.ServerType
|
|
if options.ServerType != nil {
|
|
serverType = options.ServerType
|
|
} else {
|
|
var ok bool
|
|
serverType, ok = serverTypePerArchitecture[options.Architecture]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unknown architecture %q, valid options: %q, %q", options.Architecture, hcloud.ArchitectureX86, hcloud.ArchitectureARM)
|
|
}
|
|
}
|
|
|
|
logger.DebugContext(ctx, "creating server with config",
|
|
"image", defaultImage.Name,
|
|
"location", defaultLocation.Name,
|
|
"serverType", serverType.Name,
|
|
)
|
|
serverCreateResult, _, err := s.c.Server.Create(ctx, hcloud.ServerCreateOpts{
|
|
Name: resourceName,
|
|
ServerType: serverType,
|
|
|
|
// Not used, but without this the user receives an email with a password for every created server
|
|
SSHKeys: []*hcloud.SSHKey{key},
|
|
|
|
// We need to enable rescue system first
|
|
StartAfterCreate: hcloud.Ptr(false),
|
|
// Image will never be booted, we only boot into rescue system
|
|
Image: defaultImage,
|
|
Location: defaultLocation,
|
|
Labels: labels,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating the temporary server failed: %w", err)
|
|
}
|
|
logger = logger.With("server", serverCreateResult.Server.ID)
|
|
logger.DebugContext(ctx, "Created Server")
|
|
|
|
logger.DebugContext(ctx, "waiting on actions")
|
|
err = s.c.Action.WaitFor(ctx, append(serverCreateResult.NextActions, serverCreateResult.Action)...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating the temporary server failed: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "actions finished")
|
|
|
|
server := serverCreateResult.Server
|
|
defer func() {
|
|
// Cleanup Server
|
|
if options.DebugSkipResourceCleanup {
|
|
logger.InfoContext(ctx, "Cleanup: Skipping cleanup of temporary server")
|
|
return
|
|
}
|
|
|
|
logger.InfoContext(ctx, "Cleanup: Deleting temporary server")
|
|
|
|
_, _, err := s.c.Server.DeleteWithResult(ctx, server)
|
|
if err != nil {
|
|
logger.WarnContext(ctx, "Cleanup: server could not be deleted", "error", err)
|
|
}
|
|
}()
|
|
|
|
// 3. Activate Rescue System
|
|
logger.InfoContext(ctx, "# Step 3: Activating Rescue System")
|
|
enableRescueResult, _, err := s.c.Server.EnableRescue(ctx, server, hcloud.ServerEnableRescueOpts{
|
|
Type: defaultRescueType,
|
|
SSHKeys: []*hcloud.SSHKey{key},
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("enabling the rescue system on the temporary server failed: %w", err)
|
|
}
|
|
|
|
logger.DebugContext(ctx, "rescue system requested, waiting on action")
|
|
|
|
err = s.c.Action.WaitFor(ctx, enableRescueResult.Action)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("enabling the rescue system on the temporary server failed: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "action finished, rescue system enabled")
|
|
|
|
// 4. Boot Server
|
|
logger.InfoContext(ctx, "# Step 4: Booting Server")
|
|
powerOnAction, _, err := s.c.Server.Poweron(ctx, server)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("starting the temporary server failed: %w", err)
|
|
}
|
|
|
|
logger.DebugContext(ctx, "boot requested, waiting on action")
|
|
|
|
err = s.c.Action.WaitFor(ctx, powerOnAction)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("starting the temporary server failed: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "action finished, server is booting")
|
|
|
|
// 5. Open SSH Session
|
|
logger.InfoContext(ctx, "# Step 5: Opening SSH Connection")
|
|
signer, err := ssh.ParsePrivateKey(privateKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parsing the automatically generated temporary private key failed: %w", err)
|
|
}
|
|
|
|
sshClientConfig := &ssh.ClientConfig{
|
|
User: "root",
|
|
Auth: []ssh.AuthMethod{
|
|
ssh.PublicKeys(signer),
|
|
},
|
|
// There is no way to get the host key of the rescue system beforehand
|
|
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
|
Timeout: defaultSSHDialTimeout,
|
|
}
|
|
|
|
// the server needs some time until its properly started and ssh is available
|
|
var sshClient *ssh.Client
|
|
|
|
err = control.Retry(
|
|
contextlogger.New(ctx, logger.With("operation", "ssh")),
|
|
100, // ~ 3 minutes
|
|
func() error {
|
|
var err error
|
|
logger.DebugContext(ctx, "trying to connect to server", "ip", server.PublicNet.IPv4.IP)
|
|
sshClient, err = ssh.Dial("tcp", server.PublicNet.IPv4.IP.String()+":ssh", sshClientConfig)
|
|
return err
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to ssh into temporary server: %w", err)
|
|
}
|
|
defer func() { _ = sshClient.Close() }()
|
|
|
|
// 6. Wipe existing disk, to avoid storing any bytes from it in the snapshot
|
|
logger.InfoContext(ctx, "# Step 6: Cleaning existing disk")
|
|
|
|
output, err := sshsession.Run(sshClient, "blkdiscard /dev/sda", nil)
|
|
logger.DebugContext(ctx, string(output))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to clean existing disk: %w", err)
|
|
}
|
|
|
|
// 7. SSH On Server: Download Image, Decompress, Write to Root Disk
|
|
logger.InfoContext(ctx, "# Step 7: Downloading image and writing to disk")
|
|
|
|
cmd, err := assembleCommand(options)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logger.DebugContext(ctx, "running download, decompress and write to disk command", "cmd", cmd)
|
|
|
|
output, err = sshsession.Run(sshClient, cmd, options.ImageReader)
|
|
logger.InfoContext(ctx, "# Step 7: Finished writing image to disk")
|
|
logger.DebugContext(ctx, string(output))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to download and write the image: %w", err)
|
|
}
|
|
|
|
// 8. SSH On Server: Shutdown
|
|
logger.InfoContext(ctx, "# Step 8: Shutting down server")
|
|
_, err = sshsession.Run(sshClient, "shutdown now", nil)
|
|
if err != nil {
|
|
// TODO Verify if shutdown error, otherwise return
|
|
logger.WarnContext(ctx, "shutdown returned error", "err", err)
|
|
}
|
|
|
|
// 9. Create Image from Server
|
|
logger.InfoContext(ctx, "# Step 9: Creating Image")
|
|
createImageResult, _, err := s.c.Server.CreateImage(ctx, server, &hcloud.ServerCreateImageOpts{
|
|
Type: hcloud.ImageTypeSnapshot,
|
|
Description: options.Description,
|
|
Labels: labels,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create snapshot: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "image creation requested, waiting on action")
|
|
|
|
err = s.c.Action.WaitFor(ctx, createImageResult.Action)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create snapshot: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "action finished, image was created")
|
|
|
|
image := createImageResult.Image
|
|
logger.InfoContext(ctx, "# Image was created", "image", image.ID)
|
|
|
|
// Resource cleanup is happening in `defer`
|
|
return image, nil
|
|
}
|
|
|
|
// CleanupTempResources tries to delete any resources that were left over from previous calls to [Client.Upload].
|
|
// Upload tries to clean up any temporary resources it created at runtime, but might fail at any point.
|
|
// You can then use this command to make sure that all temporary resources are removed from your project.
|
|
//
|
|
// This method tries to delete any server or ssh keys that match the [DefaultLabels]
|
|
func (s *Client) CleanupTempResources(ctx context.Context) error {
|
|
logger := contextlogger.From(ctx).With(
|
|
"library", "hcloudimages",
|
|
"method", "cleanup",
|
|
)
|
|
|
|
selector := labelutil.Selector(DefaultLabels)
|
|
logger = logger.With("selector", selector)
|
|
|
|
logger.InfoContext(ctx, "# Cleaning up Servers")
|
|
err := s.cleanupTempServers(ctx, logger, selector)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to clean up all servers: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "cleaned up all servers")
|
|
|
|
logger.InfoContext(ctx, "# Cleaning up SSH Keys")
|
|
err = s.cleanupTempSSHKeys(ctx, logger, selector)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to clean up all ssh keys: %w", err)
|
|
}
|
|
logger.DebugContext(ctx, "cleaned up all ssh keys")
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Client) cleanupTempServers(ctx context.Context, logger *slog.Logger, selector string) error {
|
|
servers, err := s.c.Server.AllWithOpts(ctx, hcloud.ServerListOpts{ListOpts: hcloud.ListOpts{
|
|
LabelSelector: selector,
|
|
}})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to list servers: %w", err)
|
|
}
|
|
|
|
if len(servers) == 0 {
|
|
logger.InfoContext(ctx, "No servers found")
|
|
return nil
|
|
}
|
|
logger.InfoContext(ctx, "removing servers", "count", len(servers))
|
|
|
|
errs := []error{}
|
|
actions := make([]*hcloud.Action, 0, len(servers))
|
|
|
|
for _, server := range servers {
|
|
result, _, err := s.c.Server.DeleteWithResult(ctx, server)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
logger.WarnContext(ctx, "failed to delete server", "server", server.ID, "error", err)
|
|
continue
|
|
}
|
|
|
|
actions = append(actions, result.Action)
|
|
}
|
|
|
|
successActions, errorActions, err := actionutil.Settle(ctx, &s.c.Action, actions...)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to wait for server delete: %w", err)
|
|
}
|
|
|
|
if len(successActions) > 0 {
|
|
ids := make([]int64, 0, len(successActions))
|
|
for _, action := range successActions {
|
|
for _, resource := range action.Resources {
|
|
if resource.Type == hcloud.ActionResourceTypeServer {
|
|
ids = append(ids, resource.ID)
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.InfoContext(ctx, "successfully deleted servers", "servers", ids)
|
|
}
|
|
|
|
if len(errorActions) > 0 {
|
|
for _, action := range errorActions {
|
|
errs = append(errs, action.Error())
|
|
}
|
|
}
|
|
|
|
if len(errs) > 0 {
|
|
// The returned message contains no info about the server IDs which failed
|
|
return fmt.Errorf("failed to delete some of the servers: %w", errors.Join(errs...))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Client) cleanupTempSSHKeys(ctx context.Context, logger *slog.Logger, selector string) error {
|
|
keys, _, err := s.c.SSHKey.List(ctx, hcloud.SSHKeyListOpts{ListOpts: hcloud.ListOpts{
|
|
LabelSelector: selector,
|
|
}})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to list keys: %w", err)
|
|
}
|
|
|
|
if len(keys) == 0 {
|
|
logger.InfoContext(ctx, "No ssh keys found")
|
|
return nil
|
|
}
|
|
|
|
errs := []error{}
|
|
for _, key := range keys {
|
|
_, err := s.c.SSHKey.Delete(ctx, key)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
logger.WarnContext(ctx, "failed to delete ssh key", "ssh-key", key.ID, "error", err)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if len(errs) > 0 {
|
|
// The returned message contains no info about the server IDs which failed
|
|
return fmt.Errorf("failed to delete some of the ssh keys: %w", errors.Join(errs...))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func assembleCommand(options UploadOptions) (string, error) {
|
|
// Make sure that we fail early, ie. if the image url does not work
|
|
cmd := "set -euo pipefail && "
|
|
|
|
if options.ImageURL != nil {
|
|
cmd += fmt.Sprintf("wget --no-verbose -O - %q | ", options.ImageURL.String())
|
|
}
|
|
|
|
if options.ImageCompression != CompressionNone {
|
|
switch options.ImageCompression {
|
|
case CompressionBZ2:
|
|
cmd += "bzip2 -cd | "
|
|
case CompressionXZ:
|
|
cmd += "xz -cd | "
|
|
default:
|
|
return "", fmt.Errorf("unknown compression: %q", options.ImageCompression)
|
|
}
|
|
}
|
|
|
|
switch options.ImageFormat {
|
|
case FormatRaw:
|
|
cmd += "dd of=/dev/sda bs=4M"
|
|
case FormatQCOW2:
|
|
cmd += "tee image.qcow2 > /dev/null && qemu-img dd -f qcow2 -O raw if=image.qcow2 of=/dev/sda bs=4M"
|
|
default:
|
|
return "", fmt.Errorf("unknown format: %q", options.ImageFormat)
|
|
}
|
|
|
|
cmd += " && sync"
|
|
|
|
// the pipefail does not work correctly without wrapping in bash.
|
|
cmd = fmt.Sprintf("bash -c '%s'", cmd)
|
|
|
|
return cmd, nil
|
|
}
|