chore: support debug shell for advanced development

Support dropping into a very minimal debug shell.

```bash
sudo -E --preserve-env=HOME _out/talosctl-linux-amd64 cluster create --provisioner=qemu $REGISTRY_MIRROR_FLAGS --controlplanes=1 --workers=0 --with-bootloader=false --with-debug-shell
```

Co-authored-by: Dmitry Sharshakov <dmitry.sharshakov@siderolabs.com>
Signed-off-by: Noel Georgi <git@frezbo.dev>
Signed-off-by: Dmitry Sharshakov <dmitry.sharshakov@siderolabs.com>
This commit is contained in:
Noel Georgi 2024-08-19 23:39:57 +05:30 committed by Dmitry Sharshakov
parent c14b446229
commit 1b22df48a4
No known key found for this signature in database
GPG Key ID: 9866BBFAF691F3AF
8 changed files with 108 additions and 0 deletions

View File

@ -6,6 +6,7 @@ ARG TOOLS
ARG PKGS
ARG EXTRAS
ARG INSTALLER_ARCH
ARG DEBUG_TOOLS_SOURCE
ARG PKGS_PREFIX
ARG PKG_FHS
@ -42,6 +43,8 @@ ARG PKG_CNI
ARG PKG_FLANNEL_CNI
ARG PKG_TALOSCTL_CNI_BUNDLE_INSTALL
ARG DEBUG_TOOLS_SOURCE
# Resolve package images using ${PKGS} to be used later in COPY --from=.
FROM ${PKG_FHS} AS pkg-fhs
@ -140,6 +143,29 @@ FROM ${PKG_KERNEL} AS pkg-kernel
FROM --platform=amd64 ${PKG_KERNEL} AS pkg-kernel-amd64
FROM --platform=arm64 ${PKG_KERNEL} AS pkg-kernel-arm64
FROM --platform=amd64 ${TOOLS} as tools-amd64
FROM --platform=arm64 ${TOOLS} as tools-arm64
FROM scratch as pkg-debug-tools-scratch-amd64
FROM scratch as pkg-debug-tools-scratch-arm64
FROM scratch as pkg-debug-tools-bash-minimal-amd64
COPY --from=tools-amd64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-amd64 /toolchain/lib/ld-musl-x86_64.so.1 /toolchain/toolchain/lib/ld-musl-x86_64.so.1
COPY --from=tools-amd64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-amd64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-amd64 /toolchain/bin/tee /toolchain/bin/tee
FROM scratch as pkg-debug-tools-bash-minimal-arm64
COPY --from=tools-arm64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-arm64 /toolchain/lib/ld-musl-aarch64.so.1 /toolchain/toolchain/lib/ld-musl-aarch64.so.1
COPY --from=tools-arm64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-arm64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-arm64 /toolchain/bin/tee /toolchain/bin/tee
FROM pkg-debug-tools-${DEBUG_TOOLS_SOURCE}-amd64 as pkg-debug-tools-amd64
FROM pkg-debug-tools-${DEBUG_TOOLS_SOURCE}-arm64 as pkg-debug-tools-arm64
# Strip CNI package.
FROM scratch AS pkg-cni-stripped-amd64
@ -651,6 +677,10 @@ COPY --link --from=pkg-kmod-amd64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-amd64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-amd64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-amd64 /machined /rootfs/sbin/init
# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-amd64 * /rootfs/
RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff
@ -721,6 +751,10 @@ COPY --link --from=pkg-kmod-arm64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-arm64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-arm64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-arm64 /machined /rootfs/sbin/init
# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-arm64 * /rootfs/
RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff

View File

@ -19,6 +19,8 @@ CI_RELEASE_TAG := $(shell git log --oneline --format=%B -n 1 HEAD^2 -- 2>/dev/nu
ARTIFACTS := _out
TOOLS ?= ghcr.io/siderolabs/tools:v1.9.0-alpha.0-4-g2058296
DEBUG_TOOLS_SOURCE := scratch
PKGS_PREFIX ?= ghcr.io/siderolabs
PKGS ?= v1.9.0-alpha.0-24-gbe92da0
EXTRAS ?= v1.9.0-alpha.0-1-geab6e58
@ -147,6 +149,11 @@ else
GO_LDFLAGS += -s -w
endif
ifneq (, $(filter $(WITH_DEBUG_SHELL), t true TRUE y yes 1))
# bash-minimal is a Dockerfile target that copies over the bash from siderolabs tools
DEBUG_TOOLS_SOURCE := bash-minimal
endif
GO_BUILDFLAGS_TALOSCTL := $(GO_BUILDFLAGS) -tags "$(GO_BUILDTAGS_TALOSCTL)"
GO_BUILDFLAGS += -tags "$(GO_BUILDTAGS)"
@ -161,6 +168,7 @@ COMMON_ARGS += --progress=$(PROGRESS)
COMMON_ARGS += --platform=$(PLATFORM)
COMMON_ARGS += --push=$(PUSH)
COMMON_ARGS += --build-arg=TOOLS=$(TOOLS)
COMMON_ARGS += --build-arg=DEBUG_TOOLS_SOURCE=$(DEBUG_TOOLS_SOURCE)
COMMON_ARGS += --build-arg=PKGS=$(PKGS)
COMMON_ARGS += --build-arg=EXTRAS=$(EXTRAS)
COMMON_ARGS += --build-arg=GOFUMPT_VERSION=$(GOFUMPT_VERSION)

View File

@ -87,6 +87,7 @@ const (
controlPlanePortFlag = "control-plane-port"
firewallFlag = "with-firewall"
tpm2EnabledFlag = "with-tpm2"
withDebugShellFlag = "with-debug-shell"
// The following flags are the gen options - the options that are only used in machine configuration (i.e., not during the qemu/docker provisioning).
// They are not applicable when no machine configuration is generated, hence mutually exclusive with the --input-dir flag.
@ -190,6 +191,7 @@ var (
withUUIDHostnames bool
withSiderolinkAgent agentFlag
withJSONLogs bool
debugShellEnabled bool
)
// createCmd represents the cluster up command.
@ -470,6 +472,7 @@ func create(ctx context.Context) error {
provision.WithBootlader(bootloaderEnabled),
provision.WithUEFI(uefiEnabled),
provision.WithTPM2(tpm2Enabled),
provision.WithDebugShell(debugShellEnabled),
provision.WithExtraUEFISearchPaths(extraUEFISearchPaths),
provision.WithTargetArch(targetArch),
provision.WithSiderolinkAgent(withSiderolinkAgent.IsEnabled()),
@ -477,6 +480,12 @@ func create(ctx context.Context) error {
var configBundleOpts []bundle.Option
if debugShellEnabled {
if provisionerName != "qemu" {
return errors.New("debug shell only supported with qemu provisioner")
}
}
if ports != "" {
if provisionerName != docker {
return errors.New("exposed-ports flag only supported with docker provisioner")
@ -968,6 +977,21 @@ func create(ctx context.Context) error {
return err
}
if debugShellEnabled {
fmt.Println("You can now connect to debug shell on any node using these commands:")
for _, node := range request.Nodes {
talosDir, err := clientconfig.GetTalosDirectory()
if err != nil {
return nil
}
fmt.Printf("socat - UNIX-CONNECT:%s\n", filepath.Join(talosDir, "clusters", clusterName, node.Name+".serial"))
}
return nil
}
// No talosconfig in the bundle - skip the operations below
if bundleTalosconfig == nil {
return nil
@ -1206,6 +1230,8 @@ func init() {
createCmd.Flags().BoolVar(&bootloaderEnabled, bootloaderEnabledFlag, true, "enable bootloader to load kernel and initramfs from disk image after install")
createCmd.Flags().BoolVar(&uefiEnabled, "with-uefi", true, "enable UEFI on x86_64 architecture")
createCmd.Flags().BoolVar(&tpm2Enabled, tpm2EnabledFlag, false, "enable TPM2 emulation support using swtpm")
createCmd.Flags().BoolVar(&debugShellEnabled, withDebugShellFlag, false, "drop talos into a maintenance shell on boot, this is for advanced debugging for developers only")
createCmd.Flags().MarkHidden("with-debug-shell") //nolint:errcheck
createCmd.Flags().StringSliceVar(&extraUEFISearchPaths, "extra-uefi-search-paths", []string{}, "additional search paths for UEFI firmware (only applies when UEFI is enabled)")
createCmd.Flags().StringSliceVar(&registryMirrors, registryMirrorFlag, []string{}, "list of registry mirrors to use in format: <registry host>=<mirror URL>")
createCmd.Flags().StringSliceVar(&registryInsecure, registryInsecureFlag, []string{}, "list of registry hostnames to skip TLS verification for")

View File

@ -11,6 +11,7 @@ import (
"path/filepath"
"github.com/siderolabs/go-debug"
"github.com/siderolabs/go-procfs/procfs"
"golang.org/x/sys/unix"
"github.com/siderolabs/talos/internal/pkg/mount"
@ -28,6 +29,8 @@ var preservedPaths = map[string]struct{}{
// Switch moves the rootfs to a specified directory. See
// https://github.com/karelzak/util-linux/blob/master/sys-utils/switch_root.c.
//
//nolint:gocyclo
func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Println("moving mounts to the new rootfs")
@ -88,6 +91,14 @@ func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Printf("race detection enabled with halt_on_error=1")
}
if val := procfs.ProcCmdline().Get("talos.debugshell"); val != nil {
if err = unix.Exec("/bin/bash", []string{"/bin/bash"}, envv); err != nil {
return fmt.Errorf("error executing /bin/bash: %w", err)
}
return nil
}
if err = unix.Exec("/sbin/init", []string{"/sbin/init"}, envv); err != nil {
return fmt.Errorf("error executing /sbin/init: %w", err)
}

View File

@ -79,6 +79,15 @@ func WithTPM2(enabled bool) Option {
}
}
// WithDebugShell drops into debug shell in initramfs.
func WithDebugShell(enabled bool) Option {
return func(o *Options) error {
o.WithDebugShell = enabled
return nil
}
}
// WithExtraUEFISearchPaths configures additional search paths to look for UEFI firmware.
func WithExtraUEFISearchPaths(extraUEFISearchPaths []string) Option {
return func(o *Options) error {
@ -166,6 +175,8 @@ type Options struct {
UEFIEnabled bool
// Enable TPM2 emulation using swtpm.
TPM2Enabled bool
// Enable debug shell in the bootloader.
WithDebugShell bool
// Configure additional search paths to look for UEFI firmware.
ExtraUEFISearchPaths []string

View File

@ -56,6 +56,7 @@ type LaunchConfig struct {
NodeUUID uuid.UUID
BadRTC bool
ArchitectureData Arch
WithDebugShell bool
// Talos config
Config string
@ -320,6 +321,14 @@ func launchVM(config *LaunchConfig) error {
"pause",
}
if config.WithDebugShell {
args = append(
args,
"-serial",
fmt.Sprintf("unix:%s/%s.serial,server,nowait", config.StatePath, config.Hostname),
)
}
var (
scsiAttached, ahciAttached, nvmeAttached bool
ahciBus int

View File

@ -89,6 +89,10 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
}
}
if opts.WithDebugShell {
cmdline.Append("talos.debugshell", "")
}
var nodeConfig string
if !nodeReq.SkipInjectingConfig {
@ -157,6 +161,7 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
TFTPServer: nodeReq.TFTPServer,
IPXEBootFileName: nodeReq.IPXEBootFilename,
APIPort: apiPort,
WithDebugShell: opts.WithDebugShell,
}
if clusterReq.IPXEBootScript != "" {

View File

@ -177,6 +177,10 @@ Specfic tests can be run with `-test.run=TestIntegration/api.ResetSuite`.
`make <something> WITH_DEBUG=1` enables Go profiling and other debug features, useful for local development.
`make initramfs WITH_DEBUG_SHELL=true` adds bash and minimal utilities for debugging purposes.
Combine with `--with-debug-shell` flag when creating cluster to obtain shell access.
This is uncommonly used as in this case the bash shell will run in place of machined.
## Destroying Cluster
```bash