mirror of
https://github.com/siderolabs/talos.git
synced 2026-05-05 20:36:18 +02:00
docs: fork docs for v1.12
Generated docs go to v1.12 folder now. Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
parent
b66b995d34
commit
34f25815c0
24
Dockerfile
24
Dockerfile
@ -1318,11 +1318,8 @@ RUN --mount=type=cache,target=/.cache,id=talos/.cache prototool break check --de
|
||||
|
||||
FROM oven/bun:1-alpine AS lint-markdown
|
||||
ARG MARKDOWNLINTCLI_VERSION
|
||||
ARG TEXTLINT_VERSION
|
||||
ARG TEXTLINT_FILTER_RULE_COMMENTS_VERSION
|
||||
ARG TEXTLINT_RULE_ONE_SENTENCE_PER_LINE_VERSION
|
||||
RUN apk add --no-cache findutils
|
||||
RUN bun i -g markdownlint-cli@${MARKDOWNLINTCLI_VERSION} textlint@${TEXTLINT_VERSION} textlint-filter-rule-comments@${TEXTLINT_FILTER_RULE_COMMENTS_VERSION} textlint-rule-one-sentence-per-line@${TEXTLINT_RULE_ONE_SENTENCE_PER_LINE_VERSION}
|
||||
RUN bun i -g markdownlint-cli@${MARKDOWNLINTCLI_VERSION}
|
||||
WORKDIR /src
|
||||
COPY . .
|
||||
RUN bun run --bun markdownlint \
|
||||
@ -1335,17 +1332,6 @@ RUN bun run --bun markdownlint \
|
||||
--ignore 'website/themes/**' \
|
||||
--disable MD045 MD056 -- \
|
||||
.
|
||||
RUN find . \
|
||||
-name '*.md' \
|
||||
-not -path './LICENCE.md' \
|
||||
-not -path './CHANGELOG.md' \
|
||||
-not -path './CODE_OF_CONDUCT.md' \
|
||||
-not -path '*/node_modules/*' \
|
||||
-not -path './hack/chglog/**' \
|
||||
-not -path './website/content/*/reference/*' \
|
||||
-not -path './website/themes/**' \
|
||||
-print0 \
|
||||
| xargs -0 bun run --bun textlint
|
||||
|
||||
# The docs target generates documentation.
|
||||
|
||||
@ -1383,10 +1369,10 @@ RUN protoc \
|
||||
/protos/time/*.proto
|
||||
|
||||
FROM scratch AS docs
|
||||
COPY --from=docs-build /tmp/configuration/ /website/content/v1.11/reference/configuration/
|
||||
COPY --from=docs-build /tmp/cli.md /website/content/v1.11/reference/
|
||||
COPY --from=docs-build /tmp/schemas /website/content/v1.11/schemas/
|
||||
COPY --from=proto-docs-build /tmp/api.md /website/content/v1.11/reference/
|
||||
COPY --from=docs-build /tmp/configuration/ /website/content/v1.12/reference/configuration/
|
||||
COPY --from=docs-build /tmp/cli.md /website/content/v1.12/reference/
|
||||
COPY --from=docs-build /tmp/schemas /website/content/v1.12/schemas/
|
||||
COPY --from=proto-docs-build /tmp/api.md /website/content/v1.12/reference/
|
||||
|
||||
# The talosctl-cni-bundle builds the CNI bundle for talosctl.
|
||||
|
||||
|
||||
11
Makefile
11
Makefile
@ -112,12 +112,6 @@ PROTOTOOL_VERSION ?= v1.10.0
|
||||
PROTOC_GEN_DOC_VERSION ?= v1.5.1
|
||||
# renovate: datasource=npm depName=markdownlint-cli
|
||||
MARKDOWNLINTCLI_VERSION ?= 0.45.0
|
||||
# renovate: datasource=npm depName=textlint
|
||||
TEXTLINT_VERSION ?= 15.2.0
|
||||
# renovate: datasource=npm depName=textlint-filter-rule-comments
|
||||
TEXTLINT_FILTER_RULE_COMMENTS_VERSION ?= 1.2.2
|
||||
# renovate: datasource=npm depName=textlint-rule-one-sentence-per-line
|
||||
TEXTLINT_RULE_ONE_SENTENCE_PER_LINE_VERSION ?= 2.0.0
|
||||
# renovate: datasource=docker versioning=docker depName=hugomods/hugo
|
||||
HUGO_VERSION ?= dart-sass-0.145.0
|
||||
OPERATING_SYSTEM := $(shell uname -s | tr "[:upper:]" "[:lower:]")
|
||||
@ -283,9 +277,6 @@ COMMON_ARGS += --build-arg=SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH)
|
||||
COMMON_ARGS += --build-arg=STRINGER_VERSION=$(STRINGER_VERSION)
|
||||
COMMON_ARGS += --build-arg=TAG=$(TAG)
|
||||
COMMON_ARGS += --build-arg=TESTPKGS=$(TESTPKGS)
|
||||
COMMON_ARGS += --build-arg=TEXTLINT_FILTER_RULE_COMMENTS_VERSION=$(TEXTLINT_FILTER_RULE_COMMENTS_VERSION)
|
||||
COMMON_ARGS += --build-arg=TEXTLINT_RULE_ONE_SENTENCE_PER_LINE_VERSION=$(TEXTLINT_RULE_ONE_SENTENCE_PER_LINE_VERSION)
|
||||
COMMON_ARGS += --build-arg=TEXTLINT_VERSION=$(TEXTLINT_VERSION)
|
||||
COMMON_ARGS += --build-arg=TOOLS_PREFIX=$(TOOLS_PREFIX)
|
||||
COMMON_ARGS += --build-arg=TOOLS=$(TOOLS)
|
||||
COMMON_ARGS += --build-arg=GENERATE_VEX_PREFIX=$(GENERATE_VEX_PREFIX)
|
||||
@ -561,7 +552,7 @@ lint-%: ## Runs the specified linter. Valid options are go, protobuf, and markdo
|
||||
@$(MAKE) target-lint-$* PLATFORM=linux/$(ARCH)
|
||||
|
||||
lint: ## Runs linters on go, vulncheck, deadcode, protobuf, and markdown file types.
|
||||
@$(MAKE) lint-go lint-vulncheck lint-deadcode lint-protobuf lint-markdown
|
||||
@$(MAKE) lint-go lint-deadcode lint-protobuf lint-markdown
|
||||
|
||||
check-dirty: ## Verifies that source tree is not dirty
|
||||
@if test -n "`git status --porcelain`"; then echo "Source tree is dirty"; git status; git diff; exit 1 ; fi
|
||||
|
||||
@ -19,7 +19,7 @@ description: "Table of supported Talos Linux versions and respective platforms."
|
||||
| - SBCs | Banana Pi M64, Jetson Nano, Libre Computer Board ALL-H3-CC, Nano Pi R4S, Pine64, Pine64 Rock64, Radxa ROCK Pi 4C, Radxa ROCK 4C+, Radxa ROCK 5B, Raspberry Pi 4B, Raspberry Pi Compute Module 4, Turing RK1, Orange Pi 5 | Banana Pi M64, Jetson Nano, Libre Computer Board ALL-H3-CC, Nano Pi R4S, Pine64, Pine64 Rock64, Radxa ROCK Pi 4C, Radxa ROCK 4C+, Radxa ROCK 5B, Raspberry Pi 4B, Raspberry Pi Compute Module 4, Turing RK1, Orange Pi 5 |
|
||||
| - local | Docker, QEMU | Docker, QEMU |
|
||||
| **Omni** | | |
|
||||
| [Omni](https://github.com/siderolabs/omni) | >= 0.50.0 | >= 0.49.0 |
|
||||
| [Omni](https://github.com/siderolabs/omni) | >= 1.1.0 | >= 0.49.0 |
|
||||
| **Cluster API** | | |
|
||||
| [CAPI Bootstrap Provider Talos](https://github.com/siderolabs/cluster-api-bootstrap-provider-talos) | >= 0.6.8 | >= 0.6.8 |
|
||||
| [CAPI Control Plane Provider Talos](https://github.com/siderolabs/cluster-api-control-plane-provider-talos) | >= 0.5.9 | >= 0.5.9 |
|
||||
|
||||
56
website/content/v1.12/_index.md
Normal file
56
website/content/v1.12/_index.md
Normal file
@ -0,0 +1,56 @@
|
||||
---
|
||||
title: Welcome
|
||||
no_list: true
|
||||
linkTitle: "Documentation"
|
||||
images: ["images/talos-dev-banner.png"]
|
||||
cascade:
|
||||
type: docs
|
||||
lastRelease: v1.12.0-alpha0
|
||||
kubernetesRelease: "1.35.0-alpha.0"
|
||||
prevKubernetesRelease: "1.34.0"
|
||||
nvidiaContainerToolkitRelease: "v1.17.8"
|
||||
nvidiaDriverRelease: "535.247.01"
|
||||
preRelease: true
|
||||
---
|
||||
|
||||
## Welcome
|
||||
|
||||
Welcome to the Talos documentation.
|
||||
If you are just getting familiar with Talos, we recommend starting here:
|
||||
|
||||
- [What is Talos]({{< relref "introduction/what-is-talos" >}}): a quick description of Talos
|
||||
- [Quickstart]({{< relref "introduction/quickstart" >}}): the fastest way to get a Talos cluster up and running
|
||||
- [Getting Started]({{< relref "introduction/getting-started" >}}): a long-form, guided tour of getting a full Talos cluster deployed
|
||||
|
||||
## Open Source
|
||||
|
||||
### Community
|
||||
|
||||
- GitHub: [repo](https://github.com/siderolabs/talos)
|
||||
- Support: Questions, bugs, feature requests [GitHub Discussions](https://github.com/siderolabs/talos/discussions)
|
||||
- Community Slack: Join our [slack channel](https://slack.dev.talos-systems.io)
|
||||
- Forum: [community](https://groups.google.com/a/siderolabs.com/forum/#!forum/community)
|
||||
- Twitter: [@SideroLabs](https://twitter.com/talossystems)
|
||||
- Email: [info@SideroLabs.com](mailto:info@SideroLabs.com)
|
||||
|
||||
If you're interested in this project and would like to help in engineering efforts, or have general usage questions, we are happy to have you!
|
||||
We hold a weekly meeting that all audiences are welcome to attend.
|
||||
|
||||
We would appreciate your feedback so that we can make Talos even better!
|
||||
To do so, you can take our [survey](https://docs.google.com/forms/d/1TUna5YTYGCKot68Y9YN_CLobY6z9JzLVCq1G7DoyNjA/edit).
|
||||
|
||||
### Office Hours
|
||||
|
||||
- When: Second Monday of every month at 16:30 UTC.
|
||||
- Where: [Google Meet](https://meet.google.com/ivb-kjfm-jfc).
|
||||
|
||||
You can subscribe to this meeting by joining the community forum above.
|
||||
|
||||
> Note: You can convert the meeting hours to your [local time](https://everytimezone.com/s/599e61d6).
|
||||
|
||||
## Enterprise
|
||||
|
||||
If you are using Talos in a production setting, and need consulting services to get started or to integrate Talos into your existing environment, we can help.
|
||||
Sidero Labs, Inc. offers support contracts with SLA (Service Level Agreement)-bound terms for mission-critical environments.
|
||||
|
||||
[Learn More](https://www.siderolabs.com/support/)
|
||||
4
website/content/v1.12/advanced/_index.md
Normal file
4
website/content/v1.12/advanced/_index.md
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
title: "Advanced Guides"
|
||||
weight: 60
|
||||
---
|
||||
108
website/content/v1.12/advanced/advanced-networking.md
Normal file
108
website/content/v1.12/advanced/advanced-networking.md
Normal file
@ -0,0 +1,108 @@
|
||||
---
|
||||
title: "Advanced Networking"
|
||||
description: "How to configure advanced networking options on Talos Linux."
|
||||
aliases:
|
||||
- ../guides/advanced-networking
|
||||
---
|
||||
|
||||
## Static Addressing
|
||||
|
||||
Static addressing is comprised of specifying `addresses`, `routes` ( remember to add your default gateway ), and `interface`.
|
||||
Most likely you'll also want to define the `nameservers` so you have properly functioning DNS.
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
hostname: talos
|
||||
nameservers:
|
||||
- 10.0.0.1
|
||||
interfaces:
|
||||
- interface: eth0
|
||||
addresses:
|
||||
- 10.0.0.201/8
|
||||
mtu: 8765
|
||||
routes:
|
||||
- network: 0.0.0.0/0
|
||||
gateway: 10.0.0.1
|
||||
- interface: eth1
|
||||
ignore: true
|
||||
time:
|
||||
servers:
|
||||
- time.cloudflare.com
|
||||
```
|
||||
|
||||
## Additional Addresses for an Interface
|
||||
|
||||
In some environments you may need to set additional addresses on an interface.
|
||||
In the following example, we set two additional addresses on the loopback interface.
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: lo
|
||||
addresses:
|
||||
- 192.168.0.21/24
|
||||
- 10.2.2.2/24
|
||||
```
|
||||
|
||||
## Bonding
|
||||
|
||||
The following example shows how to create a bonded interface.
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: bond0
|
||||
dhcp: true
|
||||
bond:
|
||||
mode: 802.3ad
|
||||
lacpRate: fast
|
||||
xmitHashPolicy: layer3+4
|
||||
miimon: 100
|
||||
updelay: 200
|
||||
downdelay: 200
|
||||
interfaces:
|
||||
- eth0
|
||||
- eth1
|
||||
```
|
||||
|
||||
## Setting Up a Bridge
|
||||
|
||||
The following example shows how to set up a bridge between two interfaces with an assigned static address.
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: br0
|
||||
addresses:
|
||||
- 192.168.0.42/24
|
||||
bridge:
|
||||
stp:
|
||||
enabled: true
|
||||
interfaces:
|
||||
- eth0
|
||||
- eth1
|
||||
```
|
||||
|
||||
## VLANs
|
||||
|
||||
To setup vlans on a specific device use an array of VLANs to add.
|
||||
The master device may be configured without addressing by setting dhcp to false.
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: eth0
|
||||
dhcp: false
|
||||
vlans:
|
||||
- vlanId: 100
|
||||
addresses:
|
||||
- "192.168.2.10/28"
|
||||
routes:
|
||||
- network: 0.0.0.0/0
|
||||
gateway: 192.168.2.1
|
||||
```
|
||||
164
website/content/v1.12/advanced/air-gapped.md
Normal file
164
website/content/v1.12/advanced/air-gapped.md
Normal file
@ -0,0 +1,164 @@
|
||||
---
|
||||
title: "Air-gapped Environments"
|
||||
description: "Setting up Talos Linux to work in environments with no internet access."
|
||||
aliases:
|
||||
- ../guides/air-gapped
|
||||
---
|
||||
|
||||
In this guide we will create a Talos cluster running in an air-gapped environment with all the required images being pulled from an internal registry.
|
||||
We will use the [QEMU]({{< relref "../talos-guides/install/local-platforms/qemu" >}}) provisioner available in `talosctl` to create a local cluster, but the same approach could be used to deploy Talos in bigger air-gapped networks.
|
||||
|
||||
## Requirements
|
||||
|
||||
The follow are requirements for this guide:
|
||||
|
||||
- Docker 18.03 or greater
|
||||
- Requirements for the Talos [QEMU]({{< relref "../talos-guides/install/local-platforms/qemu" >}}) cluster
|
||||
|
||||
## Identifying Images
|
||||
|
||||
In air-gapped environments, access to the public Internet is restricted, so Talos can't pull images from public Docker registries (`docker.io`, `ghcr.io`, etc.)
|
||||
We need to identify the images required to install and run Talos.
|
||||
The same strategy can be used for images required by custom workloads running on the cluster.
|
||||
|
||||
The `talosctl image default` command provides a list of default images used by the Talos cluster (with default configuration
|
||||
settings).
|
||||
To print the list of images, run:
|
||||
|
||||
```bash
|
||||
talosctl image default
|
||||
```
|
||||
|
||||
This list contains images required by a default deployment of Talos.
|
||||
There might be additional images required for the workloads running on this cluster, and those should be added to this list.
|
||||
|
||||
## Preparing the Internal Registry
|
||||
|
||||
As access to the public registries is restricted, we have to run an internal Docker registry.
|
||||
In this guide, we will launch the registry on the same machine using Docker:
|
||||
|
||||
```bash
|
||||
$ docker run -d -p 6000:5000 --restart always --name registry-airgapped registry:2
|
||||
1bf09802bee1476bc463d972c686f90a64640d87dacce1ac8485585de69c91a5
|
||||
```
|
||||
|
||||
This registry will be accepting connections on port 6000 on the host IPs.
|
||||
The registry is empty by default, so we have fill it with the images required by Talos.
|
||||
|
||||
First, we pull all the images to our local Docker daemon:
|
||||
|
||||
```bash
|
||||
$ for image in `talosctl image default`; do docker pull $image; done
|
||||
v0.15.1: Pulling from coreos/flannel
|
||||
Digest: sha256:9a296fbb67790659adc3701e287adde3c59803b7fcefe354f1fc482840cdb3d9
|
||||
...
|
||||
```
|
||||
|
||||
All images are now stored in the Docker daemon store:
|
||||
|
||||
```bash
|
||||
$ docker images
|
||||
REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
gcr.io/etcd-development/etcd v3.5.3 604d4f022632 6 days ago 181MB
|
||||
ghcr.io/siderolabs/install-cni v1.0.0-2-gc5d3ab0 4729e54f794d 6 days ago 76MB
|
||||
...
|
||||
```
|
||||
|
||||
Now we need to re-tag them so that we can push them to our local registry.
|
||||
We are going to replace the first component of the image name (before the first slash) with our registry endpoint `127.0.0.1:6000`:
|
||||
|
||||
```bash
|
||||
$ for image in `talosctl image default`; do \
|
||||
docker tag $image `echo $image | sed -E 's#^[^/]+/#127.0.0.1:6000/#'`; \
|
||||
done
|
||||
```
|
||||
|
||||
As the next step, we push images to the internal registry:
|
||||
|
||||
```bash
|
||||
$ for image in `talosctl image default`; do \
|
||||
docker push `echo $image | sed -E 's#^[^/]+/#127.0.0.1:6000/#'`; \
|
||||
done
|
||||
```
|
||||
|
||||
We can now verify that the images are pushed to the registry:
|
||||
|
||||
```bash
|
||||
$ curl http://127.0.0.1:6000/v2/_catalog
|
||||
{"repositories":["coredns/coredns","coreos/flannel","etcd-development/etcd","kube-apiserver","kube-controller-manager","kube-proxy","kube-scheduler","pause","siderolabs/install-cni","siderolabs/installer","siderolabs/kubelet"]}
|
||||
```
|
||||
|
||||
> Note: images in the registry don't have the registry endpoint prefix anymore.
|
||||
|
||||
## Launching Talos in an Air-gapped Environment
|
||||
|
||||
For Talos to use the internal registry, we use the registry mirror feature to redirect all image pull requests to the internal registry.
|
||||
This means that the registry endpoint (as the first component of the image reference) gets ignored, and all pull requests are sent directly to the specified endpoint.
|
||||
|
||||
We are going to use a QEMU-based Talos cluster for this guide, but the same approach works with Docker-based clusters as well.
|
||||
As QEMU-based clusters go through the Talos install process, they can be used better to model a real air-gapped environment.
|
||||
|
||||
Identify all registry prefixes from `talosctl image default`, for example:
|
||||
|
||||
- `docker.io`
|
||||
- `gcr.io`
|
||||
- `ghcr.io`
|
||||
- `registry.k8s.io`
|
||||
|
||||
The `talosctl cluster create` command provides conveniences for common configuration options.
|
||||
The only required flag for this guide is `--registry-mirror <endpoint>=http://10.5.0.1:6000` which redirects every pull request to the internal registry, this flag
|
||||
needs to be repeated for each of the identified registry prefixes above.
|
||||
The endpoint being used is `10.5.0.1`, as this is the default bridge interface address which will be routable from the QEMU VMs (`127.0.0.1` IP will be pointing to the VM itself).
|
||||
|
||||
```bash
|
||||
$ sudo --preserve-env=HOME talosctl cluster create --provisioner=qemu --install-image=ghcr.io/siderolabs/installer:{{< release >}} \
|
||||
--registry-mirror docker.io=http://10.5.0.1:6000 \
|
||||
--registry-mirror gcr.io=http://10.5.0.1:6000 \
|
||||
--registry-mirror ghcr.io=http://10.5.0.1:6000 \
|
||||
--registry-mirror registry.k8s.io=http://10.5.0.1:6000 \
|
||||
validating CIDR and reserving IPs
|
||||
generating PKI and tokens
|
||||
creating state directory in "/home/user/.talos/clusters/talos-default"
|
||||
creating network talos-default
|
||||
creating load balancer
|
||||
creating dhcpd
|
||||
creating master nodes
|
||||
creating worker nodes
|
||||
waiting for API
|
||||
...
|
||||
```
|
||||
|
||||
> Note: `--install-image` should match the image which was copied into the internal registry in the previous step.
|
||||
|
||||
You can be verify that the cluster is air-gapped by inspecting the registry logs: `docker logs -f registry-airgapped`.
|
||||
|
||||
## Closing Notes
|
||||
|
||||
Running in an air-gapped environment might require additional configuration changes, for example using custom settings for DNS and NTP servers.
|
||||
|
||||
When scaling this guide to the bare-metal environment, following Talos config snippet could be used as an equivalent of the `--registry-mirror` flag above:
|
||||
|
||||
```bash
|
||||
machine:
|
||||
...
|
||||
registries:
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoints:
|
||||
- http://10.5.0.1:6000/
|
||||
gcr.io:
|
||||
endpoints:
|
||||
- http://10.5.0.1:6000/
|
||||
ghcr.io:
|
||||
endpoints:
|
||||
- http://10.5.0.1:6000/
|
||||
registry.k8s.io:
|
||||
endpoints:
|
||||
- http://10.5.0.1:6000/
|
||||
...
|
||||
```
|
||||
|
||||
Other implementations of Docker registry can be used in place of the Docker `registry` image used above to run the registry.
|
||||
If required, auth can be configured for the internal registry (and custom TLS certificates if needed).
|
||||
|
||||
Please see [pull-through cache guide]({{< relref "../talos-guides/configuration/pull-through-cache" >}}) for an example using Harbor container registry with Talos.
|
||||
120
website/content/v1.12/advanced/building-images.md
Normal file
120
website/content/v1.12/advanced/building-images.md
Normal file
@ -0,0 +1,120 @@
|
||||
---
|
||||
title: "Building Custom Talos Images"
|
||||
description: "How to build a custom Talos image from source."
|
||||
---
|
||||
|
||||
There might be several reasons to build Talos images from source:
|
||||
|
||||
* verifying the [image integrity]({{< relref "verifying-images" >}})
|
||||
* building an image with custom configuration
|
||||
|
||||
## Checkout Talos Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/siderolabs/talos.git
|
||||
```
|
||||
|
||||
If building for a specific release, checkout the corresponding tag:
|
||||
|
||||
```bash
|
||||
git checkout {{< release >}}
|
||||
```
|
||||
|
||||
## Set up the Build Environment
|
||||
|
||||
See [Developing Talos]({{< relref "developing-talos" >}}) for details on setting up the buildkit builder.
|
||||
|
||||
## Architectures
|
||||
|
||||
By default, Talos builds for `linux/amd64`, but you can customize that by passing `PLATFORM` variable to `make`:
|
||||
|
||||
```bash
|
||||
make <target> PLATFORM=linux/arm64 # build for arm64 only
|
||||
make <target> PLATFORM=linux/arm64,linux/amd64 # build for arm64 and amd64, container images will be multi-arch
|
||||
```
|
||||
|
||||
## Custom `PKGS`
|
||||
|
||||
When [customizing Linux kernel]({{< relref "customizing-the-kernel" >}}), the source for the [`siderolabs/pkgs`](https://github.com/siderolabs/pkgs) repository can
|
||||
be overridden with:
|
||||
|
||||
* if you built and pushed only a custom `kernel` package, the reference can be overridden with `PKG_KERNEL` variable: `make <target> PKG_KERNEL=<registry>/<username>/kernel:<tag>`
|
||||
* if any other single package was customized, the reference can be overridden with `PKG_<pkg>` (e.g. `PKG_IPTABLES`) variable: `make <target> PKG_<pkg>=<registry>/<username>/<pkg>:<tag>`
|
||||
* if the full `pkgs` repository was built and pushed, the references can be overridden with `PKGS_PREFIX` and `PKGS` variables: `make <target> PKGS_PREFIX=<registry>/<username> PKGS=<tag>`
|
||||
|
||||
## Customizations
|
||||
|
||||
Some of the build parameters can be customized by passing environment variables to `make`, e.g. `GOAMD64=v1` can be used to build
|
||||
Talos images compatible with old AMD64 CPUs:
|
||||
|
||||
```bash
|
||||
make <target> GOAMD64=v1
|
||||
```
|
||||
|
||||
## Building Kernel and Initramfs
|
||||
|
||||
The most basic boot assets can be built with:
|
||||
|
||||
```bash
|
||||
make kernel initramfs
|
||||
```
|
||||
|
||||
Build result will be stored as `_out/vmlinuz-<arch>` and `_out/initramfs-<arch>.xz`.
|
||||
|
||||
## Building Container Images
|
||||
|
||||
Talos container images should be pushed to the registry as the result of the build process.
|
||||
|
||||
The default settings are:
|
||||
|
||||
* `IMAGE_REGISTRY` is set to `ghcr.io`
|
||||
* `USERNAME` is set to the `siderolabs` (or value of environment variable `USERNAME` if it is set)
|
||||
|
||||
The image can be pushed to any registry you have access to, but the access credentials should be stored in `~/.docker/config.json` file (e.g. with `docker login`).
|
||||
|
||||
Building and pushing the image can be done with:
|
||||
|
||||
```bash
|
||||
make installer-base PUSH=true IMAGE_REGISTRY=docker.io USERNAME=<username> # ghcr.io/siderolabs/installer-base
|
||||
make imager PUSH=true IMAGE_REGISTRY=docker.io USERNAME=<username> # ghcr.io/siderolabs/imager
|
||||
make installer IMAGE_REGISTRY=docker.io USERNAME=<username> # ghcr.io/siderolabs/installer
|
||||
```
|
||||
|
||||
The [local registry]({{< relref "developing-talos" >}}) running on `127.0.0.1:5005` can be used as well to avoid pushing/pulling over the network:
|
||||
|
||||
```bash
|
||||
make installer REGISTRY=127.0.0.1:5005
|
||||
```
|
||||
|
||||
When building `imager` container, by default Talos will include the boot assets for both `amd64` and `arm64` architectures, if building only for single architecture, specify `INSTALLER_ARCH` variable:
|
||||
|
||||
```bash
|
||||
make imager INSTALLER_ARCH=targetarch PLATFORM=linux/amd64
|
||||
```
|
||||
|
||||
## Building ISO
|
||||
|
||||
The [ISO image]({{< relref "../talos-guides/install/boot-assets" >}}) is built with the help of `imager` container image, by default `ghcr.io/siderolabs/imager` will be used with the matching tag:
|
||||
|
||||
```bash
|
||||
make iso
|
||||
```
|
||||
|
||||
The ISO image will be stored as `_out/talos-<arch>.iso`.
|
||||
|
||||
If ISO image should be built with the custom `imager` image, it can be specified with `IMAGE_REGISTRY`/`USERNAME` variables:
|
||||
|
||||
```bash
|
||||
make iso IMAGE_REGISTRY=docker.io USERNAME=<username>
|
||||
```
|
||||
|
||||
## Building Disk Images
|
||||
|
||||
The disk image is built with the help of `imager` container image, by default `ghcr.io/siderolabs/imager` will be used with the matching tag:
|
||||
|
||||
```bash
|
||||
make image-metal
|
||||
```
|
||||
|
||||
Available disk images are encoded in the `image-%` target, e.g. `make image-aws`.
|
||||
Same as with ISO image, the custom `imager` image can be specified with `IMAGE_REGISTRY`/`USERNAME` variables.
|
||||
187
website/content/v1.12/advanced/ca-rotation.md
Normal file
187
website/content/v1.12/advanced/ca-rotation.md
Normal file
@ -0,0 +1,187 @@
|
||||
---
|
||||
title: "CA Rotation"
|
||||
description: "How to rotate Talos and Kubernetes API root certificate authorities."
|
||||
---
|
||||
|
||||
In general, you almost never need to rotate the root CA certificate and key for the Talos API and Kubernetes API.
|
||||
Talos sets up root certificate authorities with the lifetime of 10 years, and all Talos and Kubernetes API certificates are issued by these root CAs.
|
||||
So the rotation of the root CA is only needed if:
|
||||
|
||||
- you suspect that the private key has been compromised;
|
||||
- you want to revoke access to the cluster for a leaked `talosconfig` or `kubeconfig`;
|
||||
- once in 10 years.
|
||||
|
||||
## Overview
|
||||
|
||||
There are some details which make Talos and Kubernetes API root CA rotation a bit different, but the general flow is the same:
|
||||
|
||||
- generate new CA certificate and key;
|
||||
- add new CA certificate as 'accepted', so new certificates will be accepted as valid;
|
||||
- swap issuing CA to the new one, old CA as accepted;
|
||||
- refresh all certificates in the cluster;
|
||||
- remove old CA from 'accepted'.
|
||||
|
||||
At the end of the flow, old CA is completely removed from the cluster, so all certificates issued by it will be considered invalid.
|
||||
|
||||
Both rotation flows are described in detail below.
|
||||
|
||||
## Talos API
|
||||
|
||||
### Automated Talos API CA Rotation
|
||||
|
||||
Talos API CA rotation doesn't interrupt connections within the cluster, and it doesn't require a reboot of the nodes.
|
||||
|
||||
Run the following command in dry-run mode to see the steps which will be taken:
|
||||
|
||||
```shell
|
||||
$ talosctl -n <CONTROLPLANE> rotate-ca --dry-run=true --talos=true --kubernetes=false
|
||||
> Starting Talos API PKI rotation, dry-run mode true...
|
||||
> Using config context: "talos-default"
|
||||
> Using Talos API endpoints: ["172.20.0.2"]
|
||||
> Cluster topology:
|
||||
- control plane nodes: ["172.20.0.2"]
|
||||
- worker nodes: ["172.20.0.3"]
|
||||
> Current Talos CA:
|
||||
...
|
||||
```
|
||||
|
||||
No changes will be done to the cluster in dry-run mode, so you can safely run it to see the steps.
|
||||
|
||||
Before proceeding, make sure that you can capture the output of `talosctl` command, as it will contain the new CA certificate and key.
|
||||
Record a list of Talos API users to make sure they can all be updated with new `talosconfig`.
|
||||
|
||||
Run the following command to rotate the Talos API CA:
|
||||
|
||||
```shell
|
||||
$ talosctl -n <CONTROLPLANE> rotate-ca --dry-run=false --talos=true --kubernetes=false
|
||||
> Starting Talos API PKI rotation, dry-run mode false...
|
||||
> Using config context: "talos-default-268"
|
||||
> Using Talos API endpoints: ["172.20.0.2"]
|
||||
> Cluster topology:
|
||||
- control plane nodes: ["172.20.0.2"]
|
||||
- worker nodes: ["172.20.0.3"]
|
||||
> Current Talos CA:
|
||||
...
|
||||
> New Talos CA:
|
||||
...
|
||||
> Generating new talosconfig:
|
||||
context: talos-default
|
||||
contexts:
|
||||
talos-default:
|
||||
....
|
||||
> Verifying connectivity with existing PKI:
|
||||
- 172.20.0.2: OK (version {{< release >}})
|
||||
- 172.20.0.3: OK (version {{< release >}})
|
||||
> Adding new Talos CA as accepted...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Verifying connectivity with new client cert, but old server CA:
|
||||
2024/04/17 21:26:07 retrying error: rpc error: code = Unavailable desc = connection error: desc = "error reading server preface: remote error: tls: unknown certificate authority"
|
||||
- 172.20.0.2: OK (version {{< release >}})
|
||||
- 172.20.0.3: OK (version {{< release >}})
|
||||
> Making new Talos CA the issuing CA, old Talos CA the accepted CA...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Verifying connectivity with new PKI:
|
||||
2024/04/17 21:26:08 retrying error: rpc error: code = Unavailable desc = connection error: desc = "transport: authentication handshake failed: tls: failed to verify certificate: x509: certificate signed by unknown authority (possibly because of \"x509: Ed25519 verification failure\" while trying to verify candidate authority certificate \"talos\")"
|
||||
- 172.20.0.2: OK (version {{< release >}})
|
||||
- 172.20.0.3: OK (version {{< release >}})
|
||||
> Removing old Talos CA from the accepted CAs...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Verifying connectivity with new PKI:
|
||||
- 172.20.0.2: OK (version {{< release >}})
|
||||
- 172.20.0.3: OK (version {{< release >}})
|
||||
> Writing new talosconfig to "talosconfig"
|
||||
```
|
||||
|
||||
Once the rotation is done, stash the new Talos CA, update `secrets.yaml` (if using that for machine configuration generation) with new CA key and certificate.
|
||||
|
||||
The new client `talosconfig` is written to the current directory as `talosconfig`.
|
||||
You can merge it to the default location with `talosctl config merge ./talosconfig`.
|
||||
|
||||
If other client access `talosconfig` files needs to be generated, use `talosctl config new` with new `talosconfig`.
|
||||
|
||||
> Note: if using [Talos API access from Kubernetes]({{< relref "./talos-api-access-from-k8s" >}}) feature, pods might need to be restarted manually to pick up new `talosconfig`.
|
||||
|
||||
### Manual Steps for Talos API CA Rotation
|
||||
|
||||
1. Generate new Talos CA (e.g. use `talosctl gen secrets` and use Talos CA).
|
||||
2. Patch machine configuration on all nodes updating `.machine.acceptedCAs` with new CA certificate.
|
||||
3. Generate `talosconfig` with client certificate generated with new CA, but still using old CA as server CA, verify connectivity, Talos should accept new client certificate.
|
||||
4. Patch machine configuration on all nodes updating `.machine.ca` with new CA certificate and key, and keeping old CA certificate in `.machine.acceptedCAs` (on worker nodes `.machine.ca` doesn't have the key).
|
||||
5. Generate `talosconfig` with both client certificate and server CA using new CA PKI, verify connectivity.
|
||||
6. Remove old CA certificate from `.machine.acceptedCAs` on all nodes.
|
||||
7. Verify connectivity.
|
||||
|
||||
## Kubernetes API
|
||||
|
||||
### Automated Kubernetes API CA Rotation
|
||||
|
||||
The automated process only rotates Kubernetes API CA, used by the `kube-apiserver`, `kubelet`, etc.
|
||||
Other Kubernetes secrets might need to be rotated manually as required.
|
||||
Kubernetes pods might need to be restarted to handle changes, and communication within the cluster might be disrupted during the rotation process.
|
||||
|
||||
Run the following command in dry-run mode to see the steps which will be taken:
|
||||
|
||||
```shell
|
||||
$ talosctl -n <CONTROLPLANE> rotate-ca --dry-run=true --talos=false --kubernetes=true
|
||||
> Starting Kubernetes API PKI rotation, dry-run mode true...
|
||||
> Cluster topology:
|
||||
- control plane nodes: ["172.20.0.2"]
|
||||
- worker nodes: ["172.20.0.3"]
|
||||
> Building current Kubernetes client...
|
||||
> Current Kubernetes CA:
|
||||
...
|
||||
```
|
||||
|
||||
Before proceeding, make sure that you can capture the output of `talosctl` command, as it will contain the new CA certificate and key.
|
||||
As Talos API access will not be disrupted, the changes can be reverted back if needed by reverting machine configuration.
|
||||
|
||||
Run the following command to rotate the Kubernetes API CA:
|
||||
|
||||
```shell
|
||||
$ talosctl -n <CONTROLPLANE> rotate-ca --dry-run=false --talos=false --kubernetes=true
|
||||
> Starting Kubernetes API PKI rotation, dry-run mode false...
|
||||
> Cluster topology:
|
||||
- control plane nodes: ["172.20.0.2"]
|
||||
- worker nodes: ["172.20.0.3"]
|
||||
> Building current Kubernetes client...
|
||||
> Current Kubernetes CA:
|
||||
...
|
||||
> New Kubernetes CA:
|
||||
...
|
||||
> Verifying connectivity with existing PKI...
|
||||
- OK (2 nodes ready)
|
||||
> Adding new Kubernetes CA as accepted...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Making new Kubernetes CA the issuing CA, old Kubernetes CA the accepted CA...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Building new Kubernetes client...
|
||||
> Verifying connectivity with new PKI...
|
||||
2024/04/17 21:45:52 retrying error: Get "https://172.20.0.1:6443/api/v1/nodes": EOF
|
||||
- OK (2 nodes ready)
|
||||
> Removing old Kubernetes CA from the accepted CAs...
|
||||
- 172.20.0.2: OK
|
||||
- 172.20.0.3: OK
|
||||
> Verifying connectivity with new PKI...
|
||||
- OK (2 nodes ready)
|
||||
> Kubernetes CA rotation done, new 'kubeconfig' can be fetched with `talosctl kubeconfig`.
|
||||
```
|
||||
|
||||
At the end of the process, Kubernetes control plane components will be restarted to pick up CA certificate changes.
|
||||
Each node `kubelet` will re-join the cluster with new client certficiate.
|
||||
|
||||
New `kubeconfig` can be fetched with `talosctl kubeconfig` command from the cluster.
|
||||
|
||||
Kubernetes pods might need to be restarted manually to pick up changes to the Kubernetes API CA.
|
||||
|
||||
### Manual Steps for Kubernetes API CA Rotation
|
||||
|
||||
Steps are similar [to the Talos API CA rotation](#manual-steps-for-talos-api-ca-rotation), but use:
|
||||
|
||||
- `.cluster.acceptedCAs` in place of `.machine.acceptedCAs`;
|
||||
- `.cluster.ca` in place of `.machine.ca`;
|
||||
- `kubeconfig` in place of `talosconfig`.
|
||||
285
website/content/v1.12/advanced/cgroups-analysis.md
Normal file
285
website/content/v1.12/advanced/cgroups-analysis.md
Normal file
@ -0,0 +1,285 @@
|
||||
---
|
||||
title: "Cgroups Resource Analysis"
|
||||
description: "How to use `talosctl cgroups` to monitor resource usage on the node."
|
||||
---
|
||||
|
||||
Talos provides a way to monitor resource usage of the [control groups](https://docs.kernel.org/admin-guide/cgroup-v2.html) on the machine.
|
||||
This feature is useful to understand how much resources are being used by the containers and processes running on the machine.
|
||||
|
||||
Talos creates several system cgroups:
|
||||
|
||||
* `init` (contains `machined` PID 1)
|
||||
* `system` (contains system services, and extension services)
|
||||
* `podruntime` (contains CRI containerd, kubelet, etcd)
|
||||
|
||||
Kubelet creates a tree of cgroups for each pod, and each container in the pod, starting with `kubepods` as the root group.
|
||||
|
||||
Talos Linux might set some default limits for the cgroups, and these are not configurable at the moment.
|
||||
Kubelet is configured by default to reserve some amount of RAM and CPU for system processes to prevent the system from becoming unresponsive under extreme resource pressure.
|
||||
|
||||
> Note: this feature is only available in `cgroupsv2` mode which is Talos default.
|
||||
|
||||
The `talosctl cgroups` command provides a way to monitor the resource usage of the cgroups on the machine, it has a set of presets which are described below.
|
||||
|
||||
## Presets
|
||||
|
||||
### `cpu`
|
||||
|
||||
```text
|
||||
$ talosctl cgroups --preset=cpu
|
||||
NAME CpuWeight CpuNice CpuMax CpuUser User/% CpuSystem System/% Throttled
|
||||
. unset unset [] 7m42.43755s - 8m51.855608s - 0s
|
||||
├──init 79 1 [ max 100000] 35.061148s 7.58% 41.027589s 7.71% 0s
|
||||
├──kubepods 77 1 [ max 100000] 3m29.902395s 45.39% 4m41.033592s 52.84% 0s
|
||||
│ ├──besteffort 1 19 [ max 100000] 1.297303s 0.62% 960.152ms 0.34% 0s
|
||||
│ │ └──kube-system/kube-proxy-6r5bz 1 19 [ max 100000] 1.297441s 100.01% 960.014ms 99.99% 0s
|
||||
│ │ ├──kube-proxy 1 19 [ max 100000] 1.289143s 99.36% 958.587ms 99.85% 0s
|
||||
│ │ └──sandbox 1 19 [ max 100000] 9.724ms 0.75% 0s 0.00% 0s
|
||||
│ └──burstable 14 9 [ max 100000] 3m28.653931s 99.41% 4m40.024231s 99.64% 0s
|
||||
│ ├──kube-system/kube-apiserver-talos-default-controlplane-1 8 11 [ max 100000] 2m22.458603s 68.28% 2m22.983949s 51.06% 0s
|
||||
│ │ ├──kube-apiserver 8 11 [ max 100000] 2m22.440159s 99.99% 2m22.976538s 99.99% 0s
|
||||
│ │ └──sandbox 1 19 [ max 100000] 14.774ms 0.01% 11.081ms 0.01% 0s
|
||||
│ ├──kube-system/kube-controller-manager-talos-default-controlplane-1 2 18 [ max 100000] 17.314271s 8.30% 3.014955s 1.08% 0s
|
||||
│ │ ├──kube-controller-manager 2 18 [ max 100000] 17.303941s 99.94% 3.001934s 99.57% 0s
|
||||
│ │ └──sandbox 1 19 [ max 100000] 11.675ms 0.07% 11.675ms 0.39% 0s
|
||||
│ ├──kube-system/kube-flannel-jzx6m 4 14 [ max 100000] 38.986678s 18.68% 1m47.717143s 38.47% 0s
|
||||
│ │ ├──kube-flannel 4 14 [ max 100000] 38.962703s 99.94% 1m47.690508s 99.98% 0s
|
||||
│ │ └──sandbox 1 19 [ max 100000] 14.228ms 0.04% 7.114ms 0.01% 0s
|
||||
│ └──kube-system/kube-scheduler-talos-default-controlplane-1 1 19 [ max 100000] 20.103563s 9.63% 16.099219s 5.75% 0s
|
||||
│ ├──kube-scheduler 1 19 [ max 100000] 20.092317s 99.94% 16.086603s 99.92% 0s
|
||||
│ └──sandbox 1 19 [ max 100000] 11.93ms 0.06% 11.93ms 0.07% 0s
|
||||
├──podruntime 79 1 [ max 100000] 4m59.707084s 64.81% 5m4.010222s 57.16% 0s
|
||||
│ ├──etcd 79 1 [ max 100000] 2m38.215322s 52.79% 3m7.812204s 61.78% 0s
|
||||
│ ├──kubelet 39 4 [ max 100000] 1m29.026444s 29.70% 1m23.112332s 27.34% 0s
|
||||
│ └──runtime 39 4 [ max 100000] 48.501668s 16.18% 37.049334s 12.19% 0s
|
||||
└──system 59 2 [ max 100000] 32.395345s 7.01% 12.176964s 2.29% 0s
|
||||
├──apid 20 7 [ max 100000] 1.261381s 3.89% 756.827ms 6.22% 0s
|
||||
├──dashboard 8 11 [ max 100000] 22.231337s 68.63% 5.328927s 43.76% 0s
|
||||
├──runtime 20 7 [ max 100000] 7.282253s 22.48% 5.924559s 48.65% 0s
|
||||
├──trustd 10 10 [ max 100000] 1.254353s 3.87% 220.698ms 1.81% 0s
|
||||
└──udevd 10 10 [ max 100000] 78.726ms 0.24% 233.244ms 1.92% 0s
|
||||
```
|
||||
|
||||
In the CPU view, the following columns are displayed:
|
||||
|
||||
* `CpuWeight`: the CPU weight of the cgroup (relative, controls the CPU shares/bandwidth)
|
||||
* `CpuNice`: the CPU nice value (direct translation of the `CpuWeight` to the `nice` value)
|
||||
* `CpuMax`: the maximum CPU time allowed for the cgroup
|
||||
* `CpuUser`: the total CPU time consumed by the cgroup and its children in user mode
|
||||
* `User/%`: the percentage of CPU time consumed by the cgroup and its children in user mode relative to the parent cgroup
|
||||
* `CpuSystem`: the total CPU time consumed by the cgroup and its children in system mode
|
||||
* `System/%`: the percentage of CPU time consumed by the cgroup and its children in system mode relative to the parent cgroup
|
||||
* `Throttled`: the total time the cgroup has been throttled on CPU
|
||||
|
||||
### `cpuset`
|
||||
|
||||
```bash
|
||||
$ talosctl cgroups --preset=cpuset
|
||||
NAME CpuSet CpuSet(Eff) Mems Mems(Eff)
|
||||
. 0-1 0
|
||||
├──init 0-1 0
|
||||
├──kubepods 0-1 0
|
||||
│ ├──besteffort 0-1 0
|
||||
│ │ └──kube-system/kube-proxy-6r5bz 0-1 0
|
||||
│ │ ├──kube-proxy 0-1 0
|
||||
│ │ └──sandbox 0-1 0
|
||||
│ └──burstable 0-1 0
|
||||
│ ├──kube-system/kube-apiserver-talos-default-controlplane-1 0-1 0
|
||||
│ │ ├──kube-apiserver 0-1 0
|
||||
│ │ └──sandbox 0-1 0
|
||||
│ ├──kube-system/kube-controller-manager-talos-default-controlplane-1 0-1 0
|
||||
│ │ ├──kube-controller-manager 0-1 0
|
||||
│ │ └──sandbox 0-1 0
|
||||
│ ├──kube-system/kube-flannel-jzx6m 0-1 0
|
||||
│ │ ├──kube-flannel 0-1 0
|
||||
│ │ └──sandbox 0-1 0
|
||||
│ └──kube-system/kube-scheduler-talos-default-controlplane-1 0-1 0
|
||||
│ ├──kube-scheduler 0-1 0
|
||||
│ └──sandbox 0-1 0
|
||||
├──podruntime 0-1 0
|
||||
│ ├──etcd 0-1 0
|
||||
│ ├──kubelet 0-1 0
|
||||
│ └──runtime 0-1 0
|
||||
└──system 0-1 0
|
||||
├──apid 0-1 0
|
||||
├──dashboard 0-1 0
|
||||
├──runtime 0-1 0
|
||||
├──trustd 0-1 0
|
||||
└──udevd 0-1 0
|
||||
```
|
||||
|
||||
This preset shows information about the CPU and memory sets of the cgroups, it is mostly useful with `kubelet` CPU manager.
|
||||
|
||||
* `CpuSet`: the CPU set of the cgroup
|
||||
* `CpuSet(Eff)`: the effective CPU set of the cgroup
|
||||
* `Mems`: the memory set of the cgroup (NUMA nodes)
|
||||
* `Mems(Eff)`: the effective memory set of the cgroup
|
||||
|
||||
### `io`
|
||||
|
||||
```bash
|
||||
$ talosctl cgroups --preset=io
|
||||
NAME Bytes Read/Written ios Read/Write PressAvg10 PressAvg60 PressTotal
|
||||
. loop0: 94 MiB/0 B vda: 700 MiB/803 MiB 0.12 0.37 2m12.512921s
|
||||
├──init loop0: 231 KiB/0 B vda: 4.9 MiB/4.3 MiB loop0: 6/0 vda: 206/37 0.00 0.00 232.446ms
|
||||
├──kubepods vda: 282 MiB/16 MiB vda: 3195/3172 0.00 0.00 383.858ms
|
||||
│ ├──besteffort vda: 58 MiB/0 B vda: 678/0 0.00 0.00 86.833ms
|
||||
│ │ └──kube-system/kube-proxy-6r5bz vda: 58 MiB/0 B vda: 678/0 0.00 0.00 86.833ms
|
||||
│ │ ├──kube-proxy vda: 58 MiB/0 B vda: 670/0 0.00 0.00 86.554ms
|
||||
│ │ └──sandbox vda: 692 KiB/0 B vda: 8/0 0.00 0.00 467µs
|
||||
│ └──burstable vda: 224 MiB/16 MiB vda: 2517/3172 0.00 0.00 308.616ms
|
||||
│ ├──kube-system/kube-apiserver-talos-default-controlplane-1 vda: 76 MiB/16 MiB vda: 870/3171 0.00 0.00 151.677ms
|
||||
│ │ ├──kube-apiserver vda: 76 MiB/16 MiB vda: 870/3171 0.00 0.00 156.375ms
|
||||
│ │ └──sandbox 0.00 0.00 0s
|
||||
│ ├──kube-system/kube-controller-manager-talos-default-controlplane-1 vda: 62 MiB/0 B vda: 670/0 0.00 0.00 95.432ms
|
||||
│ │ ├──kube-controller-manager vda: 62 MiB/0 B vda: 670/0 0.00 0.00 100.197ms
|
||||
│ │ └──sandbox 0.00 0.00 0s
|
||||
│ ├──kube-system/kube-flannel-jzx6m vda: 36 MiB/4.0 KiB vda: 419/1 0.00 0.00 64.203ms
|
||||
│ │ ├──kube-flannel vda: 35 MiB/0 B vda: 399/0 0.00 0.00 55.26ms
|
||||
│ │ └──sandbox 0.00 0.00 0s
|
||||
│ └──kube-system/kube-scheduler-talos-default-controlplane-1 vda: 50 MiB/0 B vda: 558/0 0.00 0.00 64.331ms
|
||||
│ ├──kube-scheduler vda: 50 MiB/0 B vda: 558/0 0.00 0.00 62.821ms
|
||||
│ └──sandbox 0.00 0.00 0s
|
||||
├──podruntime vda: 379 MiB/764 MiB vda: 3802/287674 0.39 0.39 2m13.409399s
|
||||
│ ├──etcd vda: 308 MiB/759 MiB vda: 2598/286420 0.50 0.41 2m15.407179s
|
||||
│ ├──kubelet vda: 69 MiB/62 KiB vda: 834/13 0.00 0.00 122.371ms
|
||||
│ └──runtime vda: 76 KiB/3.9 MiB vda: 19/1030 0.00 0.00 164.984ms
|
||||
└──system loop0: 18 MiB/0 B vda: 3.2 MiB/0 B loop0: 590/0 vda: 116/0 0.00 0.00 153.609ms
|
||||
├──apid loop0: 1.9 MiB/0 B loop0: 103/0 0.00 0.00 3.345ms
|
||||
├──dashboard loop0: 16 MiB/0 B loop0: 487/0 0.00 0.00 11.596ms
|
||||
├──runtime 0.00 0.00 28.957ms
|
||||
├──trustd 0.00 0.00 0s
|
||||
└──udevd vda: 3.2 MiB/0 B vda: 116/0 0.00 0.00 135.586ms
|
||||
```
|
||||
|
||||
In the IO (input/output) view, the following columns are displayed:
|
||||
|
||||
* `Bytes Read/Written`: the total number of bytes read and written by the cgroup and its children, per each blockdevice
|
||||
* `ios Read/Write`: the total number of I/O operations read and written by the cgroup and its children, per each blockdevice
|
||||
* `PressAvg10`: the average IO pressure of the cgroup and its children over the last 10 seconds
|
||||
* `PressAvg60`: the average IO pressure of the cgroup and its children over the last 60 seconds
|
||||
* `PressTotal`: the total IO pressure of the cgroup and its children (see [PSI](https://docs.kernel.org/accounting/psi.html#psi) for more information)
|
||||
|
||||
### `memory`
|
||||
|
||||
```bash
|
||||
$ talosctl cgroups --preset=memory
|
||||
NAME MemCurrent MemPeak MemLow Peak/Low MemHigh MemMin Current/Min MemMax
|
||||
. unset unset unset unset% unset unset unset% unset
|
||||
├──init 133 MiB 133 MiB 192 MiB 69.18% max 96 MiB 138.35% max
|
||||
├──kubepods 494 MiB 505 MiB 0 B max% max 0 B max% 1.4 GiB
|
||||
│ ├──besteffort 70 MiB 74 MiB 0 B max% max 0 B max% max
|
||||
│ │ └──kube-system/kube-proxy-6r5bz 70 MiB 74 MiB 0 B max% max 0 B max% max
|
||||
│ │ ├──kube-proxy 69 MiB 73 MiB 0 B max% max 0 B max% max
|
||||
│ │ └──sandbox 872 KiB 2.2 MiB 0 B max% max 0 B max% max
|
||||
│ └──burstable 424 MiB 435 MiB 0 B max% max 0 B max% max
|
||||
│ ├──kube-system/kube-apiserver-talos-default-controlplane-1 233 MiB 242 MiB 0 B max% max 0 B max% max
|
||||
│ │ ├──kube-apiserver 232 MiB 242 MiB 0 B max% max 0 B max% max
|
||||
│ │ └──sandbox 208 KiB 3.3 MiB 0 B max% max 0 B max% max
|
||||
│ ├──kube-system/kube-controller-manager-talos-default-controlplane-1 78 MiB 80 MiB 0 B max% max 0 B max% max
|
||||
│ │ ├──kube-controller-manager 78 MiB 80 MiB 0 B max% max 0 B max% max
|
||||
│ │ └──sandbox 212 KiB 3.3 MiB 0 B max% max 0 B max% max
|
||||
│ ├──kube-system/kube-flannel-jzx6m 48 MiB 50 MiB 0 B max% max 0 B max% max
|
||||
│ │ ├──kube-flannel 46 MiB 48 MiB 0 B max% max 0 B max% max
|
||||
│ │ └──sandbox 216 KiB 3.1 MiB 0 B max% max 0 B max% max
|
||||
│ └──kube-system/kube-scheduler-talos-default-controlplane-1 66 MiB 67 MiB 0 B max% max 0 B max% max
|
||||
│ ├──kube-scheduler 66 MiB 67 MiB 0 B max% max 0 B max% max
|
||||
│ └──sandbox 208 KiB 3.4 MiB 0 B max% max 0 B max% max
|
||||
├──podruntime 549 MiB 647 MiB 0 B max% max 0 B max% max
|
||||
│ ├──etcd 382 MiB 482 MiB 256 MiB 188.33% max 0 B max% max
|
||||
│ ├──kubelet 103 MiB 104 MiB 192 MiB 54.31% max 96 MiB 107.57% max
|
||||
│ └──runtime 64 MiB 71 MiB 392 MiB 18.02% max 196 MiB 32.61% max
|
||||
└──system 229 MiB 232 MiB 192 MiB 120.99% max 96 MiB 239.00% max
|
||||
├──apid 26 MiB 28 MiB 32 MiB 88.72% max 16 MiB 159.23% 40 MiB
|
||||
├──dashboard 113 MiB 113 MiB 0 B max% max 0 B max% 196 MiB
|
||||
├──runtime 74 MiB 77 MiB 96 MiB 79.89% max 48 MiB 154.57% max
|
||||
├──trustd 10 MiB 11 MiB 16 MiB 69.85% max 8.0 MiB 127.78% 24 MiB
|
||||
└──udevd 6.8 MiB 14 MiB 16 MiB 86.87% max 8.0 MiB 84.67% max
|
||||
```
|
||||
|
||||
In the memory view, the following columns are displayed:
|
||||
|
||||
* `MemCurrent`: the current memory usage of the cgroup and its children
|
||||
* `MemPeak`: the peak memory usage of the cgroup and its children
|
||||
* `MemLow`: the low memory reservation of the cgroup
|
||||
* `Peak/Low`: the ratio of the peak memory usage to the low memory reservation
|
||||
* `MemHigh`: the high memory limit of the cgroup
|
||||
* `MemMin`: the minimum memory reservation of the cgroup
|
||||
* `Current/Min`: the ratio of the current memory usage to the minimum memory reservation
|
||||
* `MemMax`: the maximum memory limit of the cgroup
|
||||
|
||||
### `swap`
|
||||
|
||||
```bash
|
||||
$ talosctl cgroups --preset=swap
|
||||
NAME SwapCurrent SwapPeak SwapHigh SwapMax
|
||||
. unset unset unset unset
|
||||
├──init 0 B 0 B max max
|
||||
├──kubepods 0 B 0 B max max
|
||||
│ ├──besteffort 0 B 0 B max max
|
||||
│ │ └──kube-system/kube-proxy-6r5bz 0 B 0 B max max
|
||||
│ │ ├──kube-proxy 0 B 0 B max 0 B
|
||||
│ │ └──sandbox 0 B 0 B max max
|
||||
│ └──burstable 0 B 0 B max max
|
||||
│ ├──kube-system/kube-apiserver-talos-default-controlplane-1 0 B 0 B max max
|
||||
│ │ ├──kube-apiserver 0 B 0 B max 0 B
|
||||
│ │ └──sandbox 0 B 0 B max max
|
||||
│ ├──kube-system/kube-controller-manager-talos-default-controlplane-1 0 B 0 B max max
|
||||
│ │ ├──kube-controller-manager 0 B 0 B max 0 B
|
||||
│ │ └──sandbox 0 B 0 B max max
|
||||
│ ├──kube-system/kube-flannel-jzx6m 0 B 0 B max max
|
||||
│ │ ├──kube-flannel 0 B 0 B max 0 B
|
||||
│ │ └──sandbox 0 B 0 B max max
|
||||
│ └──kube-system/kube-scheduler-talos-default-controlplane-1 0 B 0 B max max
|
||||
│ ├──kube-scheduler 0 B 0 B max 0 B
|
||||
│ └──sandbox 0 B 0 B max max
|
||||
├──podruntime 0 B 0 B max max
|
||||
│ ├──etcd 0 B 0 B max max
|
||||
│ ├──kubelet 0 B 0 B max max
|
||||
│ └──runtime 0 B 0 B max max
|
||||
└──system 0 B 0 B max max
|
||||
├──apid 0 B 0 B max max
|
||||
├──dashboard 0 B 0 B max max
|
||||
├──runtime 0 B 0 B max max
|
||||
├──trustd 0 B 0 B max max
|
||||
└──udevd 0 B 0 B max max
|
||||
```
|
||||
|
||||
In the swap view, the following columns are displayed:
|
||||
|
||||
* `SwapCurrent`: the current swap usage of the cgroup and its children
|
||||
* `SwapPeak`: the peak swap usage of the cgroup and its children
|
||||
* `SwapHigh`: the high swap limit of the cgroup
|
||||
* `SwapMax`: the maximum swap limit of the cgroup
|
||||
|
||||
## Custom Schemas
|
||||
|
||||
The `talosctl cgroups` command allows you to define custom schemas to display the cgroups information in a specific way.
|
||||
The schema is defined in a YAML file with the following structure:
|
||||
|
||||
```yaml
|
||||
columns:
|
||||
- name: Bytes Read/Written
|
||||
template: '{{ range $disk, $v := .IOStat }}{{ if $v }}{{ $disk }}: {{ $v.rbytes.HumanizeIBytes }}/{{ $v.wbytes.HumanizeIBytes }} {{ end }}{{ end }}'
|
||||
- name: ios Read/Write
|
||||
template: '{{ if .Parent }}{{ range $disk, $v := .IOStat }}{{ $disk }}: {{ $v.rios }}/{{ $v.wios }} {{ end }}{{ end }}'
|
||||
- name: PressAvg10
|
||||
template: '{{ .IOPressure.some.avg10 | printf "%6s" }}'
|
||||
- name: PressAvg60
|
||||
template: '{{ .IOPressure.some.avg60 | printf "%6s" }}'
|
||||
- name: PressTotal
|
||||
template: '{{ .IOPressure.some.total.UsecToDuration | printf "%12s" }}'
|
||||
```
|
||||
|
||||
The schema file can be passed to the `talosctl cgroups` command with the `--schema-file` flag:
|
||||
|
||||
```bash
|
||||
talosctl cgroups --schema-file=schema.yaml
|
||||
```
|
||||
|
||||
In the schema, for each column, you can define a `name` and a `template` which is a Go template that will be executed with the cgroups data.
|
||||
In the template, there's a `.` variable that contains the cgroups data, and `.Parent` variable which is a parent cgroup (if available).
|
||||
Each cgroup node contains information parsed from the cgroup filesystem, with field names matching the filenames adjusted for Go naming conventions,
|
||||
e.g. `io.stat` becomes `.IOStat` in the template.
|
||||
|
||||
The schemas for the presets above can be found in the [source code](https://github.com/siderolabs/talos/tree/main/cmd/talosctl/cmd/talos/cgroupsprinter/schemas).
|
||||
95
website/content/v1.12/advanced/customizing-the-kernel.md
Normal file
95
website/content/v1.12/advanced/customizing-the-kernel.md
Normal file
@ -0,0 +1,95 @@
|
||||
---
|
||||
title: "Customizing the Kernel"
|
||||
description: "Guide on how to customize the kernel used by Talos Linux."
|
||||
aliases:
|
||||
- ../guides/customizing-the-kernel
|
||||
---
|
||||
|
||||
Talos Linux configures the kernel to allow loading only cryptographically signed modules.
|
||||
The signing key is generated during the build process, it is unique to each build, and it is not available to the user.
|
||||
The public key is embedded in the kernel, and it is used to verify the signature of the modules.
|
||||
So if you want to use a custom kernel module, you will need to build your own kernel, and all required kernel modules in order to get the signature in sync with the kernel.
|
||||
|
||||
## Overview
|
||||
|
||||
In order to build a custom kernel (or a custom kernel module), the following steps are required:
|
||||
|
||||
- build a new Linux kernel and modules, push the artifacts to a registry
|
||||
- build a new Talos base artifacts: kernel and initramfs image
|
||||
- produce a new Talos boot artifact (ISO, installer image, disk image, etc.)
|
||||
|
||||
We will go through each step in detail.
|
||||
|
||||
## Building a Custom Kernel
|
||||
|
||||
First, you might need to prepare the build environment, follow the [Building Custom Images]({{< relref "building-images" >}}) guide.
|
||||
|
||||
Checkout the [`siderolabs/pkgs`](https://github.com/siderolabs/pkgs) repository:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/siderolabs/pkgs.git
|
||||
cd pkgs
|
||||
git checkout {{< release_branch >}}
|
||||
```
|
||||
|
||||
The kernel configuration is located in the files `kernel/build/config-ARCH` files.
|
||||
It can be modified using the text editor, or by using the Linux kernel `menuconfig` tool:
|
||||
|
||||
```shell
|
||||
make kernel-menuconfig
|
||||
```
|
||||
|
||||
The kernel configuration can be cleaned up by running:
|
||||
|
||||
```shell
|
||||
make kernel-olddefconfig
|
||||
```
|
||||
|
||||
Both commands will output the new configuration to the `kernel/build/config-ARCH` files.
|
||||
|
||||
Once ready, build the kernel any out-of-tree modules (if required, e.g. `zfs`) and push the artifacts to a registry:
|
||||
|
||||
```shell
|
||||
make kernel REGISTRY=127.0.0.1:5005 PUSH=true
|
||||
```
|
||||
|
||||
By default, this command will compile and push the kernel both for `amd64` and `arm64` architectures, but you can specify a single architecture by overriding
|
||||
a variable `PLATFORM`:
|
||||
|
||||
```shell
|
||||
make kernel REGISTRY=127.0.0.1:5005 PUSH=true PLATFORM=linux/amd64
|
||||
```
|
||||
|
||||
This will create a container image `127.0.0.1:5005/siderolabs/kernel:$TAG` with the kernel and modules.
|
||||
|
||||
## Building Talos Base Artifacts
|
||||
|
||||
Follow the [Building Custom Images]({{< relref "building-images" >}}) guide to set up the Talos source code checkout.
|
||||
|
||||
If some new kernel modules were introduced, adjust the list of the default modules compiled into the Talos `initramfs` by
|
||||
editing the file `hack/modules-ARCH.txt`.
|
||||
|
||||
Try building base Talos artifacts:
|
||||
|
||||
```shell
|
||||
make kernel initramfs PKG_KERNEL=127.0.0.1:5005/siderolabs/kernel:$TAG PLATFORM=linux/amd64
|
||||
```
|
||||
|
||||
This should create a new image of the kernel and initramfs in `_out/vmlinuz-amd64` and `_out/initramfs-amd64.xz` respectively.
|
||||
|
||||
> Note: if building for `arm64`, replace `amd64` with `arm64` in the commands above.
|
||||
|
||||
As a final step, produce the new `imager` container image which can generate Talos boot assets:
|
||||
|
||||
```shell
|
||||
make imager PKG_KERNEL=127.0.0.1:5005/siderolabs/kernel:$TAG PLATFORM=linux/amd64 INSTALLER_ARCH=targetarch
|
||||
```
|
||||
|
||||
> Note: if you built the kernel for both `amd64` and `arm64`, a multi-arch `imager` container can be built as well by specifying `INSTALLER_ARCH=all` and `PLATFORM=linux/amd64,linux/arm64`.
|
||||
|
||||
## Building Talos Boot Assets
|
||||
|
||||
Follow the [Boot Assets]({{< relref "../talos-guides/install/boot-assets" >}}) guide to build Talos boot assets you might need to boot Talos: ISO, `installer` image, etc.
|
||||
Replace the reference to the `imager` in guide with the reference to the `imager` container built above.
|
||||
|
||||
> Note: if you update the `imager` container, don't forget to `docker pull` it, as `docker` caches pulled images and won't pull the updated image automatically.
|
||||
441
website/content/v1.12/advanced/developing-talos.md
Normal file
441
website/content/v1.12/advanced/developing-talos.md
Normal file
@ -0,0 +1,441 @@
|
||||
---
|
||||
title: "Developing Talos"
|
||||
description: "Learn how to set up a development environment for local testing and hacking on Talos itself!"
|
||||
aliases:
|
||||
- ../learn-more/developing-talos
|
||||
---
|
||||
|
||||
This guide outlines steps and tricks to develop Talos operating systems and related components.
|
||||
The guide assumes macOS or a Linux operating system on the development host.
|
||||
|
||||
## Prepare
|
||||
|
||||
Check out the [Talos repository](https://github.com/siderolabs/talos).
|
||||
|
||||
Try running `make help` to see available `make` commands.
|
||||
You would need Docker and `buildx` installed on the host.
|
||||
|
||||
> Note: Usually it is better to install up to date Docker from Docker apt repositories, e.g. [Ubuntu instructions](https://docs.docker.com/engine/install/ubuntu/).
|
||||
>
|
||||
> If `buildx` plugin is not available with OS docker packages, it can be installed [as a plugin from GitHub releases](https://docs.docker.com/buildx/working-with-buildx/#install).
|
||||
|
||||
Set up a builder with access to the host network:
|
||||
|
||||
```bash
|
||||
docker buildx create --driver docker-container --driver-opt network=host --name local1 --buildkitd-flags '--allow-insecure-entitlement security.insecure' --use
|
||||
```
|
||||
|
||||
> Note: `network=host` allows buildx builder to access host network, so that it can push to a local container registry (see below).
|
||||
|
||||
Make sure the following steps work:
|
||||
|
||||
- `make talosctl`
|
||||
- `make initramfs kernel`
|
||||
|
||||
Set up a local docker registry:
|
||||
|
||||
```bash
|
||||
docker run -d -p 5005:5000 \
|
||||
--restart always \
|
||||
--name local registry:2
|
||||
```
|
||||
|
||||
Try to build and push to local registry an installer image:
|
||||
|
||||
```bash
|
||||
make installer-base IMAGE_REGISTRY=127.0.0.1:5005 PUSH=true
|
||||
make imager IMAGE_REGISTRY=127.0.0.1:5005 PUSH=true INSTALLER_ARCH=targetarch
|
||||
make installer IMAGE_REGISTRY=127.0.0.1:5005 PUSH=true
|
||||
```
|
||||
|
||||
Record the image name output in the step above.
|
||||
|
||||
> Note: it is also possible to force a stable image tag by using `TAG` variable: `make installer-base IMAGE_REGISTRY=127.0.0.1:5005 TAG=v1.0.0-alpha.1 PUSH=true`.
|
||||
|
||||
## Running Talos cluster
|
||||
|
||||
Set up local caching docker registries (this speeds up Talos cluster boot a lot), script is in the Talos repo:
|
||||
|
||||
```bash
|
||||
bash hack/start-registry-proxies.sh
|
||||
```
|
||||
|
||||
Start your local cluster with:
|
||||
|
||||
```bash
|
||||
sudo --preserve-env=HOME _out/talosctl-<YOUR FLAVOR> cluster create \
|
||||
--provisioner=qemu \
|
||||
--cidr=172.20.0.0/24 \
|
||||
--registry-mirror docker.io=http://172.20.0.1:5000 \
|
||||
--registry-mirror registry.k8s.io=http://172.20.0.1:5001 \
|
||||
--registry-mirror gcr.io=http://172.20.0.1:5003 \
|
||||
--registry-mirror ghcr.io=http://172.20.0.1:5004 \
|
||||
--registry-mirror 127.0.0.1:5005=http://172.20.0.1:5005 \
|
||||
--install-image=127.0.0.1:5005/siderolabs/installer:<RECORDED HASH from the build step> \
|
||||
--controlplanes 3 \
|
||||
--workers 2 \
|
||||
--with-bootloader=false
|
||||
```
|
||||
|
||||
- `--provisioner` selects QEMU vs. default Docker
|
||||
- custom `--cidr` to make QEMU cluster use different network than default Docker setup (optional)
|
||||
- `--registry-mirror` uses the caching proxies set up above to speed up boot time a lot, last one adds your local registry (installer image was pushed to it)
|
||||
- `--install-image` is the image you built with `make installer` above
|
||||
- `--controlplanes` & `--workers` configure cluster size, choose to match your resources; 3 controlplanes give you HA control plane; 1 controlplane is enough, never do 2 controlplanes
|
||||
- `--with-bootloader=false` disables boot from disk (Talos will always boot from `_out/vmlinuz-<ARCH>` and `_out/initramfs-<ARCH>.xz`).
|
||||
This speeds up development cycle a lot - no need to rebuild installer and perform an install, rebooting is enough to get new code changes.
|
||||
|
||||
> Note: when configuration changes are introduced and the old installer doesn't validate the config, or the installation flow itself is being worked on `--with-bootloader=false` should not be used
|
||||
>
|
||||
> `talosctl cluster create` derives Talos machine configuration version from the install image tag, so sometimes early in the development cycle (when new minor tag is not released yet), machine config version can be overridden with `--talos-version={{< version >}}`.
|
||||
|
||||
## Console Logs
|
||||
|
||||
Watching console logs is easy with `tail`:
|
||||
|
||||
```bash
|
||||
tail -F ~/.talos/clusters/talos-default/talos-default-*.log
|
||||
```
|
||||
|
||||
## Interacting with Talos
|
||||
|
||||
Once `talosctl cluster create` finishes successfully, `talosconfig` and `kubeconfig` will be set up automatically to point to your cluster.
|
||||
|
||||
Start playing with `talosctl`:
|
||||
|
||||
```bash
|
||||
talosctl -n 172.20.0.2 version
|
||||
talosctl -n 172.20.0.3,172.20.0.4 dashboard
|
||||
talosctl -n 172.20.0.4 get members
|
||||
```
|
||||
|
||||
Same with `kubectl`:
|
||||
|
||||
```bash
|
||||
kubectl get nodes -o wide
|
||||
```
|
||||
|
||||
You can deploy some Kubernetes workloads to the cluster.
|
||||
|
||||
You can edit machine config on the fly with `talosctl edit mc --immediate`, config patches can be applied via `--config-patch` flags, also many features have specific flags in `talosctl cluster create`.
|
||||
|
||||
## Quick Reboot
|
||||
|
||||
To reboot whole cluster quickly (e.g. to pick up a change made in the code):
|
||||
|
||||
```bash
|
||||
for socket in ~/.talos/clusters/talos-default/talos-default-*.monitor; do echo "q" | sudo socat - unix-connect:$socket; done
|
||||
```
|
||||
|
||||
Sending `q` to a single socket allows to reboot a single node.
|
||||
|
||||
> Note: This command performs immediate reboot (as if the machine was powered down and immediately powered back up), for normal Talos reboot use `talosctl reboot`.
|
||||
|
||||
## Development Cycle
|
||||
|
||||
Fast development cycle:
|
||||
|
||||
- bring up a cluster
|
||||
- make code changes
|
||||
- rebuild `initramfs` with `make initramfs`
|
||||
- reboot a node to pick new `initramfs`
|
||||
- verify code changes
|
||||
- more code changes...
|
||||
|
||||
Some aspects of Talos development require to enable bootloader (when working on `installer` itself), in that case quick development cycle is no longer possible, and cluster should be destroyed and recreated each time.
|
||||
|
||||
## Running Integration Tests
|
||||
|
||||
If integration tests were changed (or when running them for the first time), first rebuild the integration test binary:
|
||||
|
||||
```bash
|
||||
rm -f _out/integration-test-linux-amd64; make _out/integration-test-linux-amd64
|
||||
```
|
||||
|
||||
Running short tests against QEMU provisioned cluster:
|
||||
|
||||
```bash
|
||||
_out/integration-test-linux-amd64 \
|
||||
-talos.provisioner=qemu \
|
||||
-test.v \
|
||||
-test.short \
|
||||
-talos.talosctlpath=$PWD/_out/talosctl-linux-amd64
|
||||
```
|
||||
|
||||
Whole test suite can be run removing `-test.short` flag.
|
||||
|
||||
Specfic tests can be run with `-test.run=TestIntegration/api.ResetSuite`.
|
||||
|
||||
## Build Flavors
|
||||
|
||||
`make <something> WITH_RACE=1` enables Go race detector, Talos runs slower and uses more memory, but memory races are detected.
|
||||
|
||||
`make <something> WITH_DEBUG=1` enables Go profiling and other debug features, useful for local development.
|
||||
|
||||
`make initramfs WITH_DEBUG_SHELL=true` adds bash and minimal utilities for debugging purposes.
|
||||
Combine with `--with-debug-shell` flag when creating cluster to obtain shell access.
|
||||
This is uncommonly used as in this case the bash shell will run in place of machined.
|
||||
|
||||
## Destroying Cluster
|
||||
|
||||
```bash
|
||||
sudo --preserve-env=HOME ../talos/_out/talosctl-linux-amd64 cluster destroy --provisioner=qemu
|
||||
```
|
||||
|
||||
This command stops QEMU and helper processes, tears down bridged network on the host, and cleans up
|
||||
cluster state in `~/.talos/clusters`.
|
||||
|
||||
> Note: if the host machine is rebooted, QEMU instances and helpers processes won't be started back.
|
||||
> In that case it's required to clean up files in `~/.talos/clusters/<cluster-name>` directory manually.
|
||||
|
||||
## Optional
|
||||
|
||||
Set up cross-build environment with:
|
||||
|
||||
```bash
|
||||
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
```
|
||||
|
||||
> Note: the static qemu binaries which come with Ubuntu 21.10 seem to be broken.
|
||||
|
||||
## Unit tests
|
||||
|
||||
Unit tests can be run in buildx with `make unit-tests`, on Ubuntu systems some tests using `loop` devices will fail because Ubuntu uses low-index `loop` devices for snaps.
|
||||
|
||||
Most of the unit-tests can be run standalone as well, with regular `go test`, or using IDE integration:
|
||||
|
||||
```bash
|
||||
go test -v ./internal/pkg/circular/
|
||||
```
|
||||
|
||||
This provides much faster feedback loop, but some tests require either elevated privileges (running as `root`) or additional binaries available only in Talos `rootfs` (containerd tests).
|
||||
|
||||
Running tests as root can be done with `-exec` flag to `go test`, but this is risky, as test code has root access and can potentially make undesired changes:
|
||||
|
||||
```bash
|
||||
go test -exec sudo -v ./internal/app/machined/pkg/controllers/network/...
|
||||
```
|
||||
|
||||
## Go Profiling
|
||||
|
||||
Build `initramfs` with debug enabled: `make initramfs WITH_DEBUG=1`.
|
||||
|
||||
Launch Talos cluster with bootloader disabled, and use `go tool pprof` to capture the profile and show the output in your browser:
|
||||
|
||||
```bash
|
||||
go tool pprof http://172.20.0.2:9982/debug/pprof/heap
|
||||
```
|
||||
|
||||
The IP address `172.20.0.2` is the address of the Talos node, and port `:9982` depends on the Go application to profile:
|
||||
|
||||
- 9981: `apid`
|
||||
- 9982: `machined`
|
||||
- 9983: `trustd`
|
||||
|
||||
## Testing Air-gapped Environments
|
||||
|
||||
There is a hidden `talosctl debug air-gapped` command which launches two components:
|
||||
|
||||
- HTTP proxy capable of proxying HTTP and HTTPS requests
|
||||
- HTTPS server with a self-signed certificate
|
||||
|
||||
The command also writes down Talos machine configuration patch to enable the HTTP proxy and add a self-signed certificate
|
||||
to the list of trusted certificates:
|
||||
|
||||
```shell
|
||||
$ talosctl debug air-gapped --advertised-address 172.20.0.1
|
||||
2022/08/04 16:43:14 writing config patch to air-gapped-patch.yaml
|
||||
2022/08/04 16:43:14 starting HTTP proxy on :8002
|
||||
2022/08/04 16:43:14 starting HTTPS server with self-signed cert on :8001
|
||||
```
|
||||
|
||||
The `--advertised-address` should match the bridge IP of the Talos node.
|
||||
|
||||
Generated machine configuration patch looks like:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
env:
|
||||
http_proxy: http://172.20.0.1:8002
|
||||
https_proxy: http://172.20.0.1:8002
|
||||
no_proxy: 172.20.0.1/24
|
||||
cluster:
|
||||
extraManifests:
|
||||
- https://172.20.0.1:8001/debug.yaml
|
||||
---
|
||||
apiVersion: v1alpha1
|
||||
kind: TrustedRootsConfig
|
||||
name: air-gapped-ca
|
||||
certificates: |
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIBiTCCAS+gAwIBAgIBATAKBggqhkjOPQQDAjAUMRIwEAYDVQQKEwlUZXN0IE9u
|
||||
bHkwHhcNMjUwMTE1MTE1OTI3WhcNMjUwMTE2MTE1OTI3WjAUMRIwEAYDVQQKEwlU
|
||||
ZXN0IE9ubHkwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAAReznBeEcQFcB/y1yqI
|
||||
HQcP0IWBMvgwGTeaaTBM6rV+AjbnyxgCrXAnmJ0t45Eur27eW9J/1T5tzA6fe24f
|
||||
YyY9o3IwcDAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsG
|
||||
AQUFBwMCMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFEGBbafXsyzxVhVqfjzy
|
||||
7aBmVvtaMA8GA1UdEQQIMAaHBKwUAAEwCgYIKoZIzj0EAwIDSAAwRQIhAPAFm6Lv
|
||||
1Bw+M55Z1SEDLyILJSS0En5F6n8Q9LyGGT4fAiBi+Fm3wSQcvgGPG9OfokFaXmGp
|
||||
Pa6c4ZrarKO8ZxWigA==
|
||||
-----END CERTIFICATE-----
|
||||
```
|
||||
|
||||
The first section appends a self-signed certificate of the HTTPS server to the list of trusted certificates,
|
||||
followed by the HTTP proxy setup (in-cluster traffic is excluded from the proxy).
|
||||
The last section adds an extra Kubernetes manifest hosted on the HTTPS server.
|
||||
|
||||
The machine configuration patch can now be used to launch a test Talos cluster:
|
||||
|
||||
```shell
|
||||
talosctl cluster create ... --config-patch @air-gapped-patch.yaml
|
||||
```
|
||||
|
||||
The following lines should appear in the output of the `talosctl debug air-gapped` command:
|
||||
|
||||
- `CONNECT discovery.talos.dev:443`: the HTTP proxy is used to talk to the discovery service
|
||||
- `http: TLS handshake error from 172.20.0.2:53512: remote error: tls: bad certificate`: an expected error on Talos side, as self-signed cert is not written yet to the file
|
||||
- `GET /debug.yaml`: Talos successfully fetches the extra manifest successfully
|
||||
|
||||
There might be more output depending on the registry caches being used or not.
|
||||
|
||||
## Running Upgrade Integration Tests
|
||||
|
||||
Talos has a separate set of provision upgrade tests, which create a cluster on older versions of Talos, perform an upgrade,
|
||||
and verify that the cluster is still functional.
|
||||
|
||||
Build the test binary:
|
||||
|
||||
```bash
|
||||
rm -f _out/integration-test-provision-linux-amd64; make _out/integration-test-provision-linux-amd64
|
||||
```
|
||||
|
||||
Prepare the test artifacts for the upgrade test:
|
||||
|
||||
```bash
|
||||
make release-artifacts
|
||||
```
|
||||
|
||||
Build and push an installer image for the development version of Talos:
|
||||
|
||||
```bash
|
||||
make installer-base IMAGE_REGISTRY=127.0.0.1:5005 PUSH=true
|
||||
make imager IMAGE_REGISTRY=127.0.0.1:5005 PUSH=true
|
||||
make installer IMAGE_REGISTRY=127.0.0.1:5005
|
||||
```
|
||||
|
||||
Run the tests (the tests will create the cluster on the older version of Talos, perform an upgrade, and verify that the cluster is still functional):
|
||||
|
||||
```bash
|
||||
sudo --preserve-env=HOME _out/integration-test-provision-linux-amd64 \
|
||||
-test.v \
|
||||
-talos.talosctlpath _out/talosctl-linux-amd64 \
|
||||
-talos.provision.target-installer-registry=127.0.0.1:5005 \
|
||||
-talos.provision.registry-mirror 127.0.0.1:5005=http://172.20.0.1:5005,docker.io=http://172.20.0.1:5000,registry.k8s.io=http://172.20.0.1:5001,quay.io=http://172.20.0.1:5002,gcr.io=http://172.20.0.1:5003,ghcr.io=http://172.20.0.1:5004 \
|
||||
-talos.provision.cidr 172.20.0.0/24
|
||||
```
|
||||
|
||||
## SELinux policy debugging and development
|
||||
|
||||
Here are some tips about how Talos SELinux policy is built, which should mainly help developers troubleshoot denials and assess policy rules for security against different threats.
|
||||
|
||||
### Obtaining and processing denial logs
|
||||
|
||||
If SELinux has blocked some event from happening, it will log it to the audit log.
|
||||
If the mode is permissive, the only implication of would be a denial message, so permissive mode is useful for prototyping the policy.
|
||||
You can check the logs with:
|
||||
|
||||
`talosctl --nodes 172.20.0.2 logs auditd > audit.log`
|
||||
|
||||
The obtained logs can be processed with `audit2allow` to obtain a CIL code that would allow the denied event to happen, alongside an explanation of the denial.
|
||||
For this we use SELinux userspace utilities, which can be ran in a container for cases you use a Linux system without SELinux or another OS.
|
||||
Some of the useful commands are:
|
||||
|
||||
```bash
|
||||
audit2why -p ./internal/pkg/selinux/policy/policy.33 -i audit.log
|
||||
audit2allow -C -e -p ./internal/pkg/selinux/policy/policy.33 -i audit.log
|
||||
```
|
||||
|
||||
However, please do not consider the output of `audit2allow` as a final modification for the policy.
|
||||
It is a good starting point to understand the denial, but the generated code should be reviewed and correctly reformulated once confirmed to be needed and not caused by mislabeling.
|
||||
|
||||
### Iterating on the policy
|
||||
|
||||
`make generate` generates the compiled SELinux files.
|
||||
However, if you want to iterate on the policy rapidly, you might want to consider only rebuilding the policy during the testing:
|
||||
|
||||
```bash
|
||||
make local-selinux-generate DEST=./internal/pkg/selinux PLATFORM=linux/amd64 PROGRESS=plain
|
||||
```
|
||||
|
||||
### Debugging locally with many denials happening
|
||||
|
||||
Sometimes, e.g. during a major refactor, the policy can be broken and many denials can happen.
|
||||
This can cause the audit ring buffer to fill up, losing some messages.
|
||||
These are some kernel cmdline parameters that redirect the audit logs to the console, which is saved to your development cluster directory:
|
||||
|
||||
`talos.auditd.disabled=1 audit=1 audit_backlog_limit=65535 debug=1 sysctl.kernel.printk_ratelimit=0 sysctl.kernel.printk_delay=0 sysctl.kernel.printk_ratelimit_burst=10000`
|
||||
|
||||
### SELinux policy structure
|
||||
|
||||
The SELinux policy is built using the CIL language.
|
||||
The CIL files are located in `internal/pkg/selinux/policy/selinux` and are compiled into a binary format (e.g. `33` for the current kernel policy format version) using the `secilc` tool from Talos tools bundle.
|
||||
The policy is embedded into the initramfs init and loaded early in the boot process.
|
||||
|
||||
For understanding and modifying the policy, [CIL language reference](https://github.com/SELinuxProject/selinux-notebook/blob/dfabf5f1bcdc72e440c1f7010e39ae3ce9f0c364/src/notebook-examples/selinux-policy/cil/CIL_Reference_Guide.pdf) is a recommended starting point to get familiar with the language.
|
||||
[Object Classes and Permissions](https://github.com/SELinuxProject/selinux-notebook/blob/dfabf5f1bcdc72e440c1f7010e39ae3ce9f0c364/src/object_classes_permissions.md) is another helpful document, listing all SELinux entities and the meaning of all the permissions.
|
||||
|
||||
The policy directory contains the following main subdirectories:
|
||||
|
||||
- `immutable`: contains the preamble parts, mostly listing SELinux SIDs, classes, policy capabilities and roles, not expected to change frequently.
|
||||
- `common`: abstractions and common rules, which are used by the other parts of the policy or by all objects of some kind.:
|
||||
- classmaps: contains class maps, which are a SELinux concept for easily configuring the same list of permissions on a list of classes.
|
||||
Our policy frequently uses `fs_classes` classmap for enabling a group of file operations on all types of files.
|
||||
- files: labels for common system files, stored on squashfs.
|
||||
Mostly used for generalized labels not related to a particular service.
|
||||
- network: rules that allow basically any network activity, as Talos does not currently use SELinux features like IPsec labeling for network security.
|
||||
- typeattributes: this file contains typeattributes, which are a SELinux concept for grouping types together to have the same rules applied to all of them.
|
||||
This file also contains macros used to assign objects into typeattributes.
|
||||
When such a macro exists its use is recommended over using the typeattribute directly, as it allows for grepping for the macro call.
|
||||
- processes: common rules, applied to all processes or typeattribute of processes.
|
||||
We only add rules that apply widely here, with more specific rules being added to the service policy files.
|
||||
- `services`: policy files for each service.
|
||||
These files contain the definitions and rules that are specific to the service, like allowing access to its configuration files or communicating over sockets.
|
||||
Some specific parts not being a service in the Talos terms are:
|
||||
- `selinux` - selinuxfs rules protecting SELinux settings from modifications after the OS has started.
|
||||
- `system-containerd` - a containerd instance used for `apid` and similar services internal to Talos.
|
||||
- `system-containers` - `apid`, `trustd`, `etcd` and other system services, running in system containerd instance.
|
||||
|
||||
#### classmaps overview
|
||||
|
||||
- `fs_classes` - contains file classes and their permissions, used for file operations.
|
||||
- `rw` - all operations, except SELinux label management.
|
||||
- `ro` - read-only operations.
|
||||
- others - just a class permission applied to all supported file classes.
|
||||
- `netlink_classes (full)` - full (except security labels) access to all netlink socket classes.
|
||||
- `process_classes` - helpers to allow a wide range of process operations.
|
||||
- `full` - all operations, except ptrace (considered to be a rare requirement, so should be added specifically where needed).
|
||||
- `signal` - send any signal to the target process.
|
||||
|
||||
#### typeattributes overview
|
||||
|
||||
- Processes:
|
||||
- `service_p` - system services.
|
||||
- `system_container_p` - containerized system services.
|
||||
- `pod_p` - Kubernetes pods.
|
||||
- `system_p` - kernel, init, system services (not containerized).
|
||||
- `any_p` - any process registered with the SELinux.
|
||||
- Service-specific types and typeattributes in service policy files.
|
||||
- Files:
|
||||
- `common_f` - world-rw files, which can be accessed by any process.
|
||||
- `protected_f` - mostly files used by specific services, not accessible by other processes (except e.g. machined)
|
||||
- `system_f` - files and directories used by the system services, also generally to be specified by precise type and not typeattribute.
|
||||
- `system_socket_f` - sockets used for communication between system services, not accessible by workload processes.
|
||||
- `device_f`:
|
||||
- `common_device_f` - devices not considered protected like GPUs.
|
||||
- `protected_device_f` - protected devices like TPM, watchdog timers.
|
||||
- `any_f` - any file registered with the SELinux.
|
||||
- `filesystem_f` - filesystems, generally used for allowing mount operations.
|
||||
- `service_exec_f` - system service executable files.
|
||||
- Service-specific types and typeattributes in service policy files.
|
||||
- General:
|
||||
- `any_f_any_p` - any file or any process, the widest typeattribute.
|
||||
148
website/content/v1.12/advanced/disaster-recovery.md
Normal file
148
website/content/v1.12/advanced/disaster-recovery.md
Normal file
@ -0,0 +1,148 @@
|
||||
---
|
||||
title: "Disaster Recovery"
|
||||
description: "Procedure for snapshotting etcd database and recovering from catastrophic control plane failure."
|
||||
aliases:
|
||||
- ../guides/disaster-recovery
|
||||
---
|
||||
|
||||
`etcd` database backs Kubernetes control plane state, so if the `etcd` service is unavailable,
|
||||
the Kubernetes control plane goes down, and the cluster is not recoverable until `etcd` is recovered.
|
||||
`etcd` builds around the consensus protocol Raft, so highly-available control plane clusters can tolerate the loss of nodes so long as more than half of the members are running and reachable.
|
||||
For a three control plane node Talos cluster, this means that the cluster tolerates a failure of any single node,
|
||||
but losing more than one node at the same time leads to complete loss of service.
|
||||
Because of that, it is important to take routine backups of `etcd` state to have a snapshot to recover the cluster from
|
||||
in case of catastrophic failure.
|
||||
|
||||
## Backup
|
||||
|
||||
### Snapshotting `etcd` Database
|
||||
|
||||
Create a consistent snapshot of `etcd` database with `talosctl etcd snapshot` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> etcd snapshot db.snapshot
|
||||
etcd snapshot saved to "db.snapshot" (2015264 bytes)
|
||||
snapshot info: hash c25fd181, revision 4193, total keys 1287, total size 3035136
|
||||
```
|
||||
|
||||
> Note: filename `db.snapshot` is arbitrary.
|
||||
|
||||
This database snapshot can be taken on any healthy control plane node (with IP address `<IP>` in the example above),
|
||||
as all `etcd` instances contain exactly same data.
|
||||
It is recommended to configure `etcd` snapshots to be created on some schedule to allow point-in-time recovery using the latest snapshot.
|
||||
|
||||
### Disaster Database Snapshot
|
||||
|
||||
If the `etcd` cluster is not healthy (for example, if quorum has already been lost), the `talosctl etcd snapshot` command might fail.
|
||||
In that case, copy the database snapshot directly from the control plane node:
|
||||
|
||||
```bash
|
||||
talosctl -n <IP> cp /var/lib/etcd/member/snap/db .
|
||||
```
|
||||
|
||||
This snapshot might not be fully consistent (if the `etcd` process is running), but it allows
|
||||
for disaster recovery when latest regular snapshot is not available.
|
||||
|
||||
### Machine Configuration
|
||||
|
||||
Machine configuration might be required to recover the node after hardware failure.
|
||||
Backup Talos node machine configuration with the command:
|
||||
|
||||
```bash
|
||||
talosctl -n IP get mc v1alpha1 -o yaml | yq eval '.spec' -
|
||||
```
|
||||
|
||||
## Recovery
|
||||
|
||||
Before starting a disaster recovery procedure, make sure that `etcd` cluster can't be recovered:
|
||||
|
||||
* get `etcd` cluster member list on all healthy control plane nodes with `talosctl -n IP etcd members` command and compare across all members.
|
||||
* query `etcd` health across control plane nodes with `talosctl -n IP service etcd`.
|
||||
|
||||
If the quorum can be restored, restoring quorum might be a better strategy than performing full disaster recovery
|
||||
procedure.
|
||||
|
||||
### Latest Etcd Snapshot
|
||||
|
||||
Get hold of the latest `etcd` database snapshot.
|
||||
If a snapshot is not fresh enough, create a database snapshot (see above), even if the `etcd` cluster is unhealthy.
|
||||
|
||||
### Init Node
|
||||
|
||||
Make sure that there are no control plane nodes with machine type `init`:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP1>,<IP2>,... get machinetype
|
||||
NODE NAMESPACE TYPE ID VERSION TYPE
|
||||
172.20.0.2 config MachineType machine-type 2 controlplane
|
||||
172.20.0.4 config MachineType machine-type 2 controlplane
|
||||
172.20.0.3 config MachineType machine-type 2 controlplane
|
||||
```
|
||||
|
||||
Init node type is deprecated, and are incompatible with `etcd` recovery procedure.
|
||||
`init` node can be converted to `controlplane` type with `talosctl edit mc --mode=staged` command followed
|
||||
by node reboot with `talosctl reboot` command.
|
||||
|
||||
### Preparing Control Plane Nodes
|
||||
|
||||
If some control plane nodes experienced hardware failure, replace them with new nodes.
|
||||
|
||||
Use machine configuration backup to re-create the nodes with the same secret material and control plane settings
|
||||
to allow workers to join the recovered control plane.
|
||||
|
||||
If a control plane node is up but `etcd` isn't, wipe the node's [EPHEMERAL]({{< relref "../learn-more/architecture/#file-system-partitions" >}}) partition to remove the `etcd`
|
||||
data directory (make sure a database snapshot is taken before doing this):
|
||||
|
||||
```bash
|
||||
talosctl -n <IP> reset --graceful=false --reboot --system-labels-to-wipe=EPHEMERAL
|
||||
```
|
||||
|
||||
At this point, all control plane nodes should boot up, and `etcd` service should be in the `Preparing` state.
|
||||
|
||||
The Kubernetes control plane endpoint should be pointed to the new control plane nodes if there were
|
||||
changes to the node addresses.
|
||||
|
||||
### Recovering from the Backup
|
||||
|
||||
Make sure all `etcd` service instances are in `Preparing` state:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> service etcd
|
||||
NODE 172.20.0.2
|
||||
ID etcd
|
||||
STATE Preparing
|
||||
HEALTH ?
|
||||
EVENTS [Preparing]: Running pre state (17s ago)
|
||||
[Waiting]: Waiting for service "cri" to be "up", time sync (18s ago)
|
||||
[Waiting]: Waiting for service "cri" to be "up", service "networkd" to be "up", time sync (20s ago)
|
||||
```
|
||||
|
||||
Execute the bootstrap command against any control plane node passing the path to the `etcd` database snapshot:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> bootstrap --recover-from=./db.snapshot
|
||||
recovering from snapshot "./db.snapshot": hash c25fd181, revision 4193, total keys 1287, total size 3035136
|
||||
```
|
||||
|
||||
> Note: if database snapshot was copied out directly from the `etcd` data directory using `talosctl cp`,
|
||||
> add flag `--recover-skip-hash-check` to skip integrity check on restore.
|
||||
|
||||
Talos node should print matching information in the kernel log:
|
||||
|
||||
```log
|
||||
recovering etcd from snapshot: hash c25fd181, revision 4193, total keys 1287, total size 3035136
|
||||
{"level":"info","msg":"restoring snapshot","path":"/var/lib/etcd.snapshot","wal-dir":"/var/lib/etcd/member/wal","data-dir":"/var/lib/etcd","snap-dir":"/var/li}
|
||||
{"level":"info","msg":"restored last compact revision","meta-bucket-name":"meta","meta-bucket-name-key":"finishedCompactRev","restored-compact-revision":3360}
|
||||
{"level":"info","msg":"added member","cluster-id":"a3390e43eb5274e2","local-member-id":"0","added-peer-id":"eb4f6f534361855e","added-peer-peer-urls":["https:/}
|
||||
{"level":"info","msg":"restored snapshot","path":"/var/lib/etcd.snapshot","wal-dir":"/var/lib/etcd/member/wal","data-dir":"/var/lib/etcd","snap-dir":"/var/lib/etcd/member/snap"}
|
||||
```
|
||||
|
||||
Now `etcd` service should become healthy on the bootstrap node, Kubernetes control plane components
|
||||
should start and control plane endpoint should become available.
|
||||
Remaining control plane nodes join `etcd` cluster once control plane endpoint is up.
|
||||
|
||||
## Single Control Plane Node Cluster
|
||||
|
||||
This guide applies to the single control plane clusters as well.
|
||||
In fact, it is much more important to take regular snapshots of the `etcd` database in single control plane node
|
||||
case, as loss of the control plane node might render the whole cluster irrecoverable without a backup.
|
||||
44
website/content/v1.12/advanced/egress-domains.md
Normal file
44
website/content/v1.12/advanced/egress-domains.md
Normal file
@ -0,0 +1,44 @@
|
||||
---
|
||||
title: "Egress Domains"
|
||||
description: "Allowing outbound access for installing Talos"
|
||||
aliases:
|
||||
- ../guides/egress-domains
|
||||
---
|
||||
|
||||
For some more constrained environments, it is important to whitelist only specific domains for outbound internet access.
|
||||
These rules will need to be updated to allow for certain domains if the user wishes to still install and bootstrap Talos from public sources.
|
||||
That said, users should also note that all of the following components can be mirrored locally with an internal registry, as well as a self-hosted [discovery service](https://github.com/siderolabs/discovery-service) and [image factory](https://github.com/siderolabs/image-factory).
|
||||
|
||||
The following list of egress domains was tested using a Fortinet FortiGate Next-Generation Firewall to confirm that Talos was installed, bootstrapped, and Kubernetes was fully up and running.
|
||||
The FortiGate allows for passing in wildcard domains and will handle resolution of those domains to defined IPs automatically.
|
||||
All traffic is HTTPS over port 443.
|
||||
|
||||
Discovery Service:
|
||||
|
||||
- discovery.talos.dev
|
||||
|
||||
Image Factory:
|
||||
|
||||
- factory.talos.dev
|
||||
- *.azurefd.net (Azure Front Door for serving cached assets)
|
||||
|
||||
Google Container Registry / Google Artifact Registry (GCR/GAR):
|
||||
|
||||
- gcr.io
|
||||
- storage.googleapis.com (backing blob storage for images)
|
||||
- *.pkg.dev (backing blob storage for images)
|
||||
|
||||
Github Container Registry (GHCR)
|
||||
|
||||
- ghcr.io
|
||||
- *.githubusercontent.com (backing blob storage for images)
|
||||
|
||||
Kubernetes Registry (k8s.io)
|
||||
|
||||
- registry.k8s.io
|
||||
- *.s3.dualstack.us-east-1.amazonaws.com (backing blob storage for images)
|
||||
|
||||
> Note: In this testing, DNS and NTP servers were updated to use those services that are built-in to the FortiGate.
|
||||
These may also need to be allowed if the user cannot make use of internal services.
|
||||
Additionally,these rules only cover that which is required for Talos to be fully installed and running.
|
||||
There may be other domains like docker.io that must be allowed for non-default CNIs or workload container images.
|
||||
226
website/content/v1.12/advanced/etcd-maintenance.md
Normal file
226
website/content/v1.12/advanced/etcd-maintenance.md
Normal file
@ -0,0 +1,226 @@
|
||||
---
|
||||
title: "etcd Maintenance"
|
||||
description: "Operational instructions for etcd database."
|
||||
---
|
||||
|
||||
`etcd` database backs Kubernetes control plane state, so `etcd` health is critical for Kubernetes availability.
|
||||
|
||||
> Note: Commands from `talosctl etcd` namespace are functional only on the Talos control plane nodes.
|
||||
> Each time you see `<IPx>` in this page, it is referencing IP address of control plane node.
|
||||
|
||||
## Space Quota
|
||||
|
||||
`etcd` default database space quota is set to 2 GiB by default.
|
||||
If the database size exceeds the quota, `etcd` will stop operations until the issue is resolved.
|
||||
|
||||
This condition can be checked with `talosctl etcd alarm list` command:
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP> etcd alarm list
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER ALARM
|
||||
172.20.0.2 a49c021e76e707db NOSPACE
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
If the Kubernetes database contains lots of resources, space quota can be increased to match the actual usage.
|
||||
The recommended maximum size is 8 GiB.
|
||||
|
||||
To increase the space quota, edit the `etcd` section in the machine configuration:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
etcd:
|
||||
extraArgs:
|
||||
quota-backend-bytes: 4294967296 # 4 GiB
|
||||
```
|
||||
|
||||
Once the node is rebooted with the new configuration, use `talosctl etcd alarm disarm` to clear the `NOSPACE` alarm.
|
||||
|
||||
## Defragmentation
|
||||
|
||||
`etcd` database can become fragmented over time if there are lots of writes and deletes.
|
||||
Kubernetes API server performs automatic compaction of the `etcd` database, which marks deleted space as free and ready to be reused.
|
||||
However, the space is not actually freed until the database is defragmented.
|
||||
|
||||
If the database is heavily fragmented (in use/db size ratio is less than 0.5), defragmentation might increase the performance.
|
||||
If the database runs over the space quota (see above), but the actual in use database size is small, defragmentation is required to bring the on-disk database size below the limit.
|
||||
|
||||
Current database size can be checked with `talosctl etcd status` command:
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1>,<IP2>,<IP3> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
|
||||
172.20.0.3 ecebb05b59a776f1 21 MB 6.0 MB (29.08%) ecebb05b59a776f1 53391 4 53391 false
|
||||
172.20.0.2 a49c021e76e707db 17 MB 4.5 MB (26.10%) ecebb05b59a776f1 53391 4 53391 false
|
||||
172.20.0.4 eb47fb33e59bf0e2 20 MB 5.9 MB (28.96%) ecebb05b59a776f1 53391 4 53391 false
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
If any of the nodes are over database size quota, alarms will be printed in the `ERRORS` column.
|
||||
|
||||
To defragment the database, run `talosctl etcd defrag` command:
|
||||
|
||||
```bash
|
||||
talosctl -n <IP1> etcd defrag
|
||||
```
|
||||
|
||||
> Note: Defragmentation is a resource-intensive operation, so it is recommended to run it on a single node at a time.
|
||||
> Defragmentation to a live member blocks the system from reading and writing data while rebuilding its state.
|
||||
|
||||
Once the defragmentation is complete, the database size will match closely to the in use size:
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER ERRORS
|
||||
172.20.0.2 a49c021e76e707db 4.5 MB 4.5 MB (100.00%) ecebb05b59a776f1 56065 4 56065 false
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
## Snapshotting
|
||||
|
||||
Regular backups of `etcd` database should be performed to ensure that the cluster can be restored in case of a failure.
|
||||
This procedure is described in the [disaster recovery]({{< relref "disaster-recovery" >}}) guide.
|
||||
|
||||
## Downgrade v3.6 to v3.5
|
||||
|
||||
Before beginning, check the `etcd` health and download snapshot, as described in [disaster recovery]({{< relref "disaster-recovery" >}}).
|
||||
Should something go wrong with the downgrade, it is possible to use this backup to rollback to existing `etcd` version.
|
||||
|
||||
This example shows how to downgrade an `etcd` in Talos cluster.
|
||||
|
||||
### Step 1: Check Downgrade Requirements
|
||||
|
||||
Is the cluster healthy and running v3.6.x?
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1>,<IP2>,<IP3> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER PROTOCOL STORAGE ERRORS
|
||||
172.20.0.4 a2b8a1f794bdd561 3.6 MB 2.2 MB (61.59%) a49c021e76e707db 4703 2 4703 false 3.6.4 3.6.0
|
||||
172.20.0.3 912415ee6ed360c4 3.5 MB 2.2 MB (61.88%) a49c021e76e707db 4703 2 4703 false 3.6.4 3.6.0
|
||||
172.20.0.2 a49c021e76e707db 3.5 MB 2.2 MB (62.06%) a49c021e76e707db 4703 2 4703 false 3.6.4 3.6.0
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
### Step 2: Download Snapshot
|
||||
|
||||
[Download the snapshot backup]({{< relref "disaster-recovery" >}}) to provide a downgrade path should any problems occur.
|
||||
|
||||
### Step 3: Validate Downgrade
|
||||
|
||||
Validate the downgrade target version before enabling the downgrade:
|
||||
|
||||
- We only support downgrading one minor version at a time, e.g. downgrading from v3.6 to v3.4 isn't allowed.
|
||||
- Please do not move on to next step until the validation is successful.
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1> etcd downgrade validate 3.5
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MESSAGE
|
||||
172.20.0.2 downgrade validate success, cluster version 3.6
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
### Step 4: Enable Downgrade
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1> etcd downgrade enable 3.5
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MESSAGE
|
||||
172.20.0.2 downgrade enable success, cluster version 3.6
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
After enabling downgrade, the cluster will start to operate with v3.5 protocol, which is the downgrade target version.
|
||||
In addition, `etcd` will automatically migrate the schema to the downgrade target version, which usually happens very fast.
|
||||
Confirm the storage version of all servers has been migrated to v3.5 by checking the endpoint status before moving on to the next step.
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1>,<IP2>,<IP3> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER PROTOCOL STORAGE ERRORS
|
||||
172.20.0.3 912415ee6ed360c4 3.5 MB 1.9 MB (54.92%) a49c021e76e707db 5152 2 5152 false 3.6.4 3.5.0
|
||||
172.20.0.2 a49c021e76e707db 3.5 MB 1.9 MB (54.64%) a49c021e76e707db 5152 2 5152 false 3.6.4 3.5.0
|
||||
172.20.0.4 a2b8a1f794bdd561 3.6 MB 1.9 MB (54.44%) a49c021e76e707db 5152 2 5152 false 3.6.4 3.5.0
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
> Note: Once downgrade is enabled, the cluster will remain operating with v3.5 protocol even if all the servers are still running the v3.6 binary, unless the downgrade is canceled with `talosctl -n <IP1> downgrade cancel`.
|
||||
|
||||
### Step 5: Patch Machine Config
|
||||
|
||||
Before patching the node, check if the etcd is leader.
|
||||
We recommend downgrading the leader last.
|
||||
If the server to be downgraded is the leader, you can avoid some downtime by `forfeit-leadership` to another server before stopping this server.
|
||||
|
||||
```bash
|
||||
talosctl -n <IP1> etcd forfeit-leadership
|
||||
```
|
||||
|
||||
Create a file with the patch pointing to desired `etcd` image:
|
||||
|
||||
```yaml
|
||||
# etcd-patch.yaml
|
||||
cluster:
|
||||
etcd:
|
||||
image: gcr.io/etcd-development/etcd:v3.5.22
|
||||
```
|
||||
|
||||
Apply patch to the machine with same configuration but with the new `etcd` version.
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1> patch machineconfig --patch @etcd-patch.yaml --mode reboot
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
patched MachineConfigs.config.talos.dev/v1alpha1 at the node 172.20.0.2
|
||||
Applied configuration with a reboot
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
Verify that each member, and then the entire cluster, becomes healthy with the new v3.5 `etcd`:
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1>,<IP2>,<IP3> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER PROTOCOL STORAGE ERRORS
|
||||
172.20.0.2 a49c021e76e707db 3.5 MB 3.1 MB (88.05%) a2b8a1f794bdd561 13116 4 13116 false 3.5.22 3.5.0
|
||||
172.20.0.4 a2b8a1f794bdd561 3.6 MB 3.1 MB (88.12%) a2b8a1f794bdd561 13116 4 13116 false 3.6.4 3.5.0
|
||||
172.20.0.3 912415ee6ed360c4 3.5 MB 3.1 MB (88.30%) a2b8a1f794bdd561 13116 4 13116 false 3.6.4 3.5.0
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
|
||||
### Step 6: Continue on the Remaining Control Plane Nodes
|
||||
|
||||
When all members are downgraded, check the health and status of the cluster, and confirm the minor version of all members is v3.5, and storage version is empty:
|
||||
|
||||
{{< tabpane >}}
|
||||
{{< tab header="Command" lang="Bash" >}}
|
||||
talosctl -n <IP1>,<IP2>,<IP3> etcd status
|
||||
{{< /tab >}}
|
||||
{{< tab header="Output" lang="Console" >}}
|
||||
NODE MEMBER DB SIZE IN USE LEADER RAFT INDEX RAFT TERM RAFT APPLIED INDEX LEARNER PROTOCOL STORAGE ERRORS
|
||||
172.20.0.2 a49c021e76e707db 4.5 MB 4.5 MB (100.00%) 912415ee6ed360c4 13865 5 13865 false 3.5.22 3.5.0
|
||||
172.20.0.4 a2b8a1f794bdd561 4.6 MB 4.6 MB (100.00%) 912415ee6ed360c4 13865 5 13865 false 3.5.22 3.5.0
|
||||
172.20.0.3 912415ee6ed360c4 4.6 MB 4.6 MB (99.64%) 912415ee6ed360c4 13865 5 13865 false 3.5.22 3.5.0
|
||||
{{< /tab >}}
|
||||
{{< /tabpane >}}
|
||||
208
website/content/v1.12/advanced/extension-services.md
Normal file
208
website/content/v1.12/advanced/extension-services.md
Normal file
@ -0,0 +1,208 @@
|
||||
---
|
||||
title: "Extension Services"
|
||||
description: "Use extension services in Talos Linux."
|
||||
aliases:
|
||||
- ../learn-more/extension-services
|
||||
---
|
||||
|
||||
Talos provides a way to run additional system services early in the Talos boot process.
|
||||
Extension services should be included into the Talos root filesystem (e.g. using [system extensions]({{< relref "../talos-guides/configuration/system-extensions" >}})).
|
||||
Extension services run as privileged containers with ephemeral root filesystem located in the Talos root filesystem.
|
||||
|
||||
Extension services can be used to use extend core features of Talos in a way that is not possible via [static pods]({{< relref "../advanced/static-pods" >}}) or
|
||||
Kubernetes DaemonSets.
|
||||
|
||||
Potential extension services use-cases:
|
||||
|
||||
* storage: Open iSCSI, software RAID, etc.
|
||||
* networking: BGP FRR, etc.
|
||||
* platform integration: VMWare open VM tools, etc.
|
||||
|
||||
## Configuration
|
||||
|
||||
Talos on boot scans directory `/usr/local/etc/containers` for `*.yaml` files describing the extension services to run.
|
||||
Format of the extension service config:
|
||||
|
||||
```yaml
|
||||
name: hello-world
|
||||
container:
|
||||
entrypoint: ./hello-world
|
||||
environment:
|
||||
- XDG_RUNTIME_DIR=/run
|
||||
args:
|
||||
- -f
|
||||
mounts:
|
||||
- # OCI Mount Spec
|
||||
depends:
|
||||
- configuration: true
|
||||
- service: cri
|
||||
- path: /run/machined/machined.sock
|
||||
- network:
|
||||
- addresses
|
||||
- connectivity
|
||||
- hostname
|
||||
- etcfiles
|
||||
- time: true
|
||||
restart: never|always|untilSuccess
|
||||
logToConsole: true|false
|
||||
```
|
||||
|
||||
### `name`
|
||||
|
||||
Field `name` sets the service name, valid names are `[a-z0-9-_]+`.
|
||||
The service container root filesystem path is derived from the `name`: `/usr/local/lib/containers/<name>`.
|
||||
The extension service will be registered as a Talos service under an `ext-<name>` identifier.
|
||||
|
||||
### `container`
|
||||
|
||||
* `entrypoint` defines the container entrypoint relative to the container root filesystem (`/usr/local/lib/containers/<name>`)
|
||||
* `environmentFile` (**deprecated**) defines the path to a file containing environment variables, the service waits for the file to
|
||||
exist before starting.
|
||||
Use `ExtensionServiceConfig` instead.
|
||||
* `environment` defines the container environment variables.
|
||||
* `args` defines the additional arguments to pass to the entrypoint
|
||||
* `mounts` defines the volumes to be mounted into the container root
|
||||
|
||||
#### `container.mounts`
|
||||
|
||||
The section `mounts` uses the standard OCI spec:
|
||||
|
||||
```yaml
|
||||
- source: /var/log/audit
|
||||
destination: /var/log/audit
|
||||
type: bind
|
||||
options:
|
||||
- rshared
|
||||
- bind
|
||||
- ro
|
||||
```
|
||||
|
||||
All requested directories will be mounted into the extension service container mount namespace.
|
||||
If the `source` directory doesn't exist in the host filesystem, it will be created (only for writable paths in the Talos root filesystem).
|
||||
|
||||
#### `container.security`
|
||||
|
||||
The section `security` follows this example:
|
||||
|
||||
```yaml
|
||||
maskedPaths:
|
||||
- "/should/be/masked"
|
||||
readonlyPaths:
|
||||
- "/path/that/should/be/readonly"
|
||||
- "/another/readonly/path"
|
||||
writeableRootfs: true
|
||||
writeableSysfs: true
|
||||
rootfsPropagation: shared
|
||||
```
|
||||
|
||||
> * The rootfs is readonly by default unless `writeableRootfs: true` is set.
|
||||
> * The sysfs is readonly by default unless `writeableSysfs: true` is set.
|
||||
> * Masked paths if not set defaults to [containerd defaults](https://github.com/containerd/containerd/tree/main/oci/spec.go).
|
||||
Masked paths will be mounted to `/dev/null`.
|
||||
To set empty masked paths use:
|
||||
>
|
||||
> ```yaml
|
||||
> container:
|
||||
> security:
|
||||
> maskedPaths: []
|
||||
> ```
|
||||
>
|
||||
> * Read Only paths if not set defaults to [containerd defaults](https://github.com/containerd/containerd/tree/main/oci/spec.go).
|
||||
Read-only paths will be mounted to `/dev/null`.
|
||||
To set empty read only paths use:
|
||||
>
|
||||
> ```yaml
|
||||
> container:
|
||||
> security:
|
||||
> readonlyPaths: []
|
||||
> ```
|
||||
>
|
||||
> * Rootfs propagation is not set by default (container mounts are private).
|
||||
|
||||
### `depends`
|
||||
|
||||
The `depends` section describes extension service start dependencies: the service will not be started until all dependencies are met.
|
||||
|
||||
Available dependencies:
|
||||
|
||||
* `service: <name>`: wait for the service `<name>` to be running and healthy
|
||||
* `path: <path>`: wait for the `<path>` to exist
|
||||
* `network: [addresses, connectivity, hostname, etcfiles]`: wait for the specified network readiness checks to succeed
|
||||
* `time: true`: wait for the NTP time sync
|
||||
* `configuration: true`: wait for `ExtensionServiceConfig` resource with a name matching the extension name to be available.
|
||||
The mounts specified in the `ExtensionServiceConfig` will be added as extra mounts to the extension service.
|
||||
|
||||
### `restart`
|
||||
|
||||
Field `restart` defines the service restart policy, it allows to either configure an always running service or a one-shot service:
|
||||
|
||||
* `always`: restart service always
|
||||
* `never`: start service only once and never restart
|
||||
* `untilSuccess`: restart failing service, stop restarting on successful run
|
||||
|
||||
### `logToConsole`
|
||||
|
||||
Field `logToConsole` defines whether the service logs should also be written to the console, i.e., to kernel log buffer (or to the container logs in container mode).
|
||||
|
||||
This feature is particularly useful for debugging extensions that operate in maintenance mode or early in the boot process when service logs cannot be accessed yet.
|
||||
|
||||
## Example
|
||||
|
||||
Example layout of the Talos root filesystem contents for the extension service:
|
||||
|
||||
```text
|
||||
/
|
||||
└── usr
|
||||
└── local
|
||||
├── etc
|
||||
│ └── containers
|
||||
│ └── hello-world.yaml
|
||||
└── lib
|
||||
└── containers
|
||||
└── hello-world
|
||||
├── hello
|
||||
└── config.ini
|
||||
```
|
||||
|
||||
Talos discovers the extension service configuration in `/usr/local/etc/containers/hello-world.yaml`:
|
||||
|
||||
```yaml
|
||||
name: hello-world
|
||||
container:
|
||||
entrypoint: ./hello
|
||||
args:
|
||||
- --config
|
||||
- config.ini
|
||||
depends:
|
||||
- network:
|
||||
- addresses
|
||||
restart: always
|
||||
```
|
||||
|
||||
Talos starts the container for the extension service with container root filesystem at `/usr/local/lib/containers/hello-world`:
|
||||
|
||||
```text
|
||||
/
|
||||
├── hello
|
||||
└── config.ini
|
||||
```
|
||||
|
||||
Extension service is registered as `ext-hello-world` in `talosctl services`:
|
||||
|
||||
```shell
|
||||
$ talosctl service ext-hello-world
|
||||
NODE 172.20.0.5
|
||||
ID ext-hello-world
|
||||
STATE Running
|
||||
HEALTH ?
|
||||
EVENTS [Running]: Started task ext-hello-world (PID 1100) for container ext-hello-world (2m47s ago)
|
||||
[Preparing]: Creating service runner (2m47s ago)
|
||||
[Preparing]: Running pre state (2m47s ago)
|
||||
[Waiting]: Waiting for service "containerd" to be "up" (2m48s ago)
|
||||
[Waiting]: Waiting for service "containerd" to be "up", network (2m49s ago)
|
||||
```
|
||||
|
||||
An extension service can be started, restarted and stopped using `talosctl service ext-hello-world start|restart|stop`.
|
||||
Use `talosctl logs ext-hello-world` to get the logs of the service.
|
||||
|
||||
Complete example of the extension service can be found in the [extensions repository](https://github.com/talos-systems/extensions/tree/main/examples/hello-world-service).
|
||||
538
website/content/v1.12/advanced/install-kubevirt.md
Normal file
538
website/content/v1.12/advanced/install-kubevirt.md
Normal file
@ -0,0 +1,538 @@
|
||||
---
|
||||
title: "Install KubeVirt on Talos"
|
||||
description: "This is a guide on how to get started with KubeVirt on Talos"
|
||||
---
|
||||
|
||||
KubeVirt allows you to run virtual machines on Kubernetes.
|
||||
It runs with QEMU and KVM to provide a seamless virtual machine experience and can be mixed with containerized workloads.
|
||||
This guide explains on how to install KubeVirt on Talos.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
For KubeVirt and Talos to work you have to enable certain configurations in the BIOS and configure Talos properly for it to work.
|
||||
|
||||
### Enable virtualization in your BIOS
|
||||
|
||||
On many new PCs and servers, virtualization is enabled by default.
|
||||
Please consult your manufacturer on how to enable this in the BIOS.
|
||||
You can also run KubeVirt from within a virtual machine.
|
||||
For that to work you have to enable Nested Virtualization.
|
||||
This can also be done in the BIOS.
|
||||
|
||||
### Configure your network interface in bridge mode (optional)
|
||||
|
||||
When you want to leverage [Multus]({{< relref "../kubernetes-guides/network/multus" >}}) to give your virtual machines direct access to your node network, your bridge needs to be configured properly.
|
||||
This can be done by setting your network interface in bridge mode.
|
||||
You can look up the network interface name by using the following command:
|
||||
|
||||
```bash
|
||||
$ talosctl get links -n 10.99.101.9
|
||||
NODE NAMESPACE TYPE ID VERSION TYPE KIND HW ADDR OPER STATE LINK STATE
|
||||
10.99.101.9 network LinkStatus bond0 1 ether bond 52:62:01:53:5b:a7 down false
|
||||
10.99.101.9 network LinkStatus br0 3 ether bridge bc:24:11:a1:98:fc up true
|
||||
10.99.101.9 network LinkStatus cni0 9 ether bridge 1e:5e:99:8f:1e:19 up true
|
||||
10.99.101.9 network LinkStatus dummy0 1 ether dummy 62:1c:3e:d5:72:11 down false
|
||||
10.99.101.9 network LinkStatus eth0 5 ether bc:24:11:a1:98:fc
|
||||
```
|
||||
|
||||
In this case, this network interface is called `eth0`.
|
||||
Now you can configure your bridge properly.
|
||||
This can be done in the machine config of your node:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
interfaces:
|
||||
- interface: br0
|
||||
addresses:
|
||||
- 10.99.101.9/24
|
||||
bridge:
|
||||
stp:
|
||||
enabled: true
|
||||
interfaces:
|
||||
- eth0 # This must be changed to your matching interface name
|
||||
routes:
|
||||
- network: 0.0.0.0/0 # The route's network (destination).
|
||||
gateway: 10.99.101.254 # The route's gateway (if empty, creates link scope route).
|
||||
metric: 1024 # The optional metric for the route.
|
||||
```
|
||||
|
||||
### Install the `local-path-provisioner`
|
||||
|
||||
When we are using KubeVirt, we are also installing the CDI (containerized data importer) operator.
|
||||
For this to work properly, we have to install the `local-path-provisioner`.
|
||||
This CNI can be used to write scratch space when importing images with the CDI.
|
||||
|
||||
You can install the `local-path-provisioner` by following [this guide]({{< relref "../kubernetes-guides/configuration/local-storage" >}}).
|
||||
|
||||
### Configure storage
|
||||
|
||||
If you would like to use features such as `LiveMigration` shared storage is neccesary.
|
||||
You can either choose to install a CSI that connects to NFS or you can install Longhorn, for example.
|
||||
For more information on how to install Longhorn on Talos you can follow [this](https://longhorn.io/docs/1.7.2/advanced-resources/os-distro-specific/talos-linux-support/) link.
|
||||
|
||||
To install the NFS-CSI driver, you can follow [This](https://github.com/kubernetes-csi/csi-driver-nfs/blob/master/docs/install-csi-driver-v4.9.0.md) guide.
|
||||
|
||||
After the installation of the NFS-CSI driver is done, you can create a storage class for the NFS CSI driver to work:
|
||||
|
||||
```yaml
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: nfs-csi
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: "true"
|
||||
provisioner: nfs.csi.k8s.io
|
||||
parameters:
|
||||
server: 10.99.102.253
|
||||
share: /mnt/data/nfs/kubernetes_csi
|
||||
reclaimPolicy: Delete
|
||||
volumeBindingMode: Immediate
|
||||
mountOptions:
|
||||
- nfsvers=3
|
||||
- nolock
|
||||
```
|
||||
|
||||
Note that this is just an example.
|
||||
Make sure to set the `nolock` option.
|
||||
If not, the nfs-csi storageclass won't work, because talos doesn't have a `rpc.statd` daemon running.
|
||||
|
||||
### Install `virtctl`
|
||||
|
||||
`virtctl` is needed for communication between the CLI and the KubeVirt api server.
|
||||
|
||||
You can install the `virtctl` client directly by running:
|
||||
|
||||
```bash
|
||||
export VERSION=$(curl https://storage.googleapis.com/kubevirt-prow/release/kubevirt/kubevirt/stable.txt)
|
||||
wget https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/virtctl-${VERSION}-linux-amd64
|
||||
```
|
||||
|
||||
Or you can use [krew](https://github.com/kubernetes-sigs/krew/#installation) to integrate it nicely in `kubectl`:
|
||||
|
||||
```bash
|
||||
kubectl krew install virt
|
||||
```
|
||||
|
||||
## Installing KubeVirt
|
||||
|
||||
After the neccesary preperations are done, you can now install KubeVirt.
|
||||
This can either be done through the [Operator Lifecycle Manager](https://olm.operatorframework.io/docs/getting-started/) or by just simply applying a YAML file.
|
||||
We will keep this simple and do the following:
|
||||
|
||||
```bash
|
||||
# Point at latest release
|
||||
export RELEASE=$(curl https://storage.googleapis.com/kubevirt-prow/release/kubevirt/kubevirt/stable.txt)
|
||||
# Deploy the KubeVirt operator
|
||||
kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/${RELEASE}/kubevirt-operator.yaml
|
||||
```
|
||||
|
||||
After the operator is installed, it is time to apply the Custom Resource (CR) for the operator to fully deploy KubeVirt.
|
||||
|
||||
```yaml
|
||||
---
|
||||
apiVersion: kubevirt.io/v1
|
||||
kind: KubeVirt
|
||||
metadata:
|
||||
name: kubevirt
|
||||
namespace: kubevirt
|
||||
spec:
|
||||
configuration:
|
||||
developerConfiguration:
|
||||
featureGates:
|
||||
- LiveMigration
|
||||
- NetworkBindingPlugins
|
||||
smbios:
|
||||
sku: "TalosCloud"
|
||||
version: "v0.1.0"
|
||||
manufacturer: "Talos Virtualization"
|
||||
product: "talosvm"
|
||||
family: "ccio"
|
||||
workloadUpdateStrategy:
|
||||
workloadUpdateMethods:
|
||||
- LiveMigrate # enable if you have deployed either Longhorn or NFS-CSI for shared storage.
|
||||
```
|
||||
|
||||
### KubeVirt configuration options
|
||||
|
||||
In this yaml file we specified certain configurations:
|
||||
|
||||
#### `featureGates`
|
||||
|
||||
KubeVirt has a set of features that are not mature enough to be enabled by default.
|
||||
As such, they are protected by a Kubernetes concept called feature gates.
|
||||
More information about the feature gates can be found in the [KubeVirt](https://kubevirt.io/user-guide/cluster_admin/activating_feature_gates/) documentation.
|
||||
|
||||
In this example we enable:
|
||||
|
||||
- `LiveMigration` -- For live migration of virtual machines to other nodes
|
||||
- `NetworkBindingPlugins` -- This is needed for Multus to work.
|
||||
|
||||
#### `smbios`
|
||||
|
||||
Here we configure a specific smbios configuration.
|
||||
This can be useful when you want to give your virtual machines a own sku, manufacturer name etc.
|
||||
|
||||
#### `workloadUpdateStrategy`
|
||||
|
||||
If this is configured, virtual machines will be live migrated to other nodes when KubeVirt is updated.
|
||||
|
||||
## Installing CDI
|
||||
|
||||
The CDI (containerized data importer) is needed to import virtual disk images in your KubeVirt cluster.
|
||||
The CDI can do the following:
|
||||
|
||||
- Import images of type:
|
||||
- qcow2
|
||||
- raw
|
||||
- iso
|
||||
- Import disks from either:
|
||||
- http/https
|
||||
- uploaded through virtctl
|
||||
- Container registry
|
||||
- Another PVC
|
||||
|
||||
You can either import these images by creating a DataVolume CR or by integrating this in your `VirtualMachine` CR.
|
||||
|
||||
When applying either the `DataVolume` CR or the `VirtualMachine` CR with a `dataVolumeTemplates`, the CDI kicks in and will do the following:
|
||||
|
||||
- creates a PVC with the requirements from either the `DataVolume` or the `dataVolumeTemplates`
|
||||
- starts a pod
|
||||
- writes temporary scratch space to local disk
|
||||
- downloads the image
|
||||
- extracts it to the temporary scratch space
|
||||
- copies the image to the PVC
|
||||
|
||||
Installing the CDI is very simple:
|
||||
|
||||
```bash
|
||||
# Point to latest release
|
||||
export TAG=$(curl -s -w %{redirect_url} \
|
||||
https://github.com/kubevirt/containerized-data-importer/releases/latest)
|
||||
|
||||
export VERSION=$(echo ${TAG##*/})
|
||||
|
||||
# install operator
|
||||
kubectl create -f \
|
||||
https://github.com/kubevirt/containerized-data-importer/releases/download/$VERSION/cdi-operator.yaml
|
||||
```
|
||||
|
||||
After that, you can apply a CDI CR for the CDI operator to fully deploy CDI:
|
||||
|
||||
```yaml
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: CDI
|
||||
metadata:
|
||||
name: cdi
|
||||
spec:
|
||||
config:
|
||||
scratchSpaceStorageClass: local-path
|
||||
podResourceRequirements:
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "60M"
|
||||
limits:
|
||||
cpu: "750m"
|
||||
memory: "2Gi"
|
||||
```
|
||||
|
||||
This CR has some special settings that are needed for CDI to work properly:
|
||||
|
||||
### `scratchSpaceStorageClass`
|
||||
|
||||
This is the storage class that we installed earlier with the `local-path-provisioner`.
|
||||
This is needed for the CDI to write scratch space to local disk before importing the image
|
||||
|
||||
### `podResourceRequirements`
|
||||
|
||||
In many cases the default resource requests and limits are not sufficient for the importer pod to import the image.
|
||||
This will result in a crash of the importer pod.
|
||||
|
||||
After applying this yaml file, the CDI operator is ready.
|
||||
|
||||
## Creating your first virtual machine
|
||||
|
||||
Now it is time to create your first virtual machine in KubeVirt.
|
||||
Below we will describe two examples:
|
||||
|
||||
- A virtual machine with the default CNI
|
||||
- A virtual machine with Multus
|
||||
|
||||
### Basic virtual machine example with default CNI
|
||||
|
||||
```yaml
|
||||
---
|
||||
apiVersion: kubevirt.io/v1
|
||||
kind: VirtualMachine
|
||||
metadata:
|
||||
name: fedora-vm
|
||||
spec:
|
||||
running: false
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
kubevirt.io/vm: fedora-vm
|
||||
annotations:
|
||||
kubevirt.io/allow-pod-bridge-network-live-migration: "true"
|
||||
|
||||
spec:
|
||||
evictionStrategy: LiveMigrate
|
||||
domain:
|
||||
cpu:
|
||||
cores: 2
|
||||
resources:
|
||||
requests:
|
||||
memory: 4G
|
||||
devices:
|
||||
disks:
|
||||
- name: fedora-vm-pvc
|
||||
disk:
|
||||
bus: virtio
|
||||
- name: cloudinitdisk
|
||||
disk:
|
||||
bus: virtio
|
||||
interfaces:
|
||||
- name: podnet
|
||||
masquerade: {}
|
||||
networks:
|
||||
- name: podnet
|
||||
pod: {}
|
||||
volumes:
|
||||
- name: fedora-vm-pvc
|
||||
persistentVolumeClaim:
|
||||
claimName: fedora-vm-pvc
|
||||
- name: cloudinitdisk
|
||||
cloudInitNoCloud:
|
||||
networkData: |
|
||||
network:
|
||||
version: 1
|
||||
config:
|
||||
- type: physical
|
||||
name: eth0
|
||||
subnets:
|
||||
- type: dhcp
|
||||
userData: |-
|
||||
#cloud-config
|
||||
users:
|
||||
- name: cloud-user
|
||||
ssh_authorized_keys:
|
||||
- ssh-rsa ....
|
||||
sudo: ['ALL=(ALL) NOPASSWD:ALL']
|
||||
groups: sudo
|
||||
shell: /bin/bash
|
||||
runcmd:
|
||||
- "sudo touch /root/installed"
|
||||
- "sudo dnf update"
|
||||
- "sudo dnf install httpd fastfetch -y"
|
||||
- "sudo systemctl daemon-reload"
|
||||
- "sudo systemctl enable httpd"
|
||||
- "sudo systemctl start --no-block httpd"
|
||||
|
||||
dataVolumeTemplates:
|
||||
- metadata:
|
||||
name: fedora-vm-pvc
|
||||
spec:
|
||||
storage:
|
||||
resources:
|
||||
requests:
|
||||
storage: 35Gi
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
storageClassName: "nfs-csi"
|
||||
source:
|
||||
http:
|
||||
url: "https://fedora.mirror.wearetriple.com/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2"
|
||||
```
|
||||
|
||||
In this examples we install a basic Fedora 40 virtual machine and install a webserver.
|
||||
|
||||
After applying this YAML, the CDI will import the image and create a `Datavolume`.
|
||||
You can monitor this process by running:
|
||||
|
||||
```bash
|
||||
kubectl get dv -w
|
||||
```
|
||||
|
||||
After the `DataVolume` is created, you can start the virtual machine:
|
||||
|
||||
```bash
|
||||
kubectl virt start fedora-vm
|
||||
```
|
||||
|
||||
By starting the virtual machine, KubeVirt will create a instance of that `VirtualMachine` called `VirtualMachineInstance`:
|
||||
|
||||
```bash
|
||||
kubectl get virtualmachineinstance
|
||||
NAME AGE PHASE IP NODENAME READY
|
||||
fedora-vm 13s Running 10.244.4.92 kube1 True
|
||||
```
|
||||
|
||||
You can view the console of the virtual machine by running:
|
||||
|
||||
```bash
|
||||
kubectl virt console fedora-vm
|
||||
```
|
||||
|
||||
or by running:
|
||||
|
||||
```bash
|
||||
kubectl virt vnc fedora-vm
|
||||
```
|
||||
|
||||
with the `console` command it will open a terminal to the virtual machine.
|
||||
With the `vnc` command, it will open `vncviewer`.
|
||||
Note that a `vncviewer` needs to installed for it to work.
|
||||
|
||||
Now you can create a `Service` object to expose the virtual machine to the outside.
|
||||
In this example we will use [MetalLB](https://metallb.universe.tf/) as a LoadBalancer.
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
kubevirt.io/vm: fedora-vm
|
||||
name: fedora-vm
|
||||
spec:
|
||||
ipFamilyPolicy: PreferDualStack
|
||||
externalTrafficPolicy: Local
|
||||
ports:
|
||||
- name: ssh
|
||||
port: 22
|
||||
protocol: TCP
|
||||
targetPort: 22
|
||||
- name: httpd
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
kubevirt.io/vm: fedora-vm
|
||||
type: LoadBalancer
|
||||
```
|
||||
|
||||
```bash
|
||||
$ kubectl get svc
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
fedora-vm LoadBalancer 10.96.14.253 10.99.50.1 22:31149/TCP,80:31445/TCP 2s
|
||||
```
|
||||
|
||||
And we can reach the server with either ssh or http:
|
||||
|
||||
```bash
|
||||
$ nc -zv 10.99.50.1 22
|
||||
Ncat: Version 7.92 ( https://nmap.org/ncat )
|
||||
Ncat: Connected to 10.99.50.1:22.
|
||||
Ncat: 0 bytes sent, 0 bytes received in 0.01 seconds.
|
||||
|
||||
$ nc -zv 10.99.50.1 80
|
||||
Ncat: Version 7.92 ( https://nmap.org/ncat )
|
||||
Ncat: Connected to 10.99.50.1:80.
|
||||
Ncat: 0 bytes sent, 0 bytes received in 0.01 seconds.
|
||||
```
|
||||
|
||||
### Basic virtual machine example with Multus
|
||||
|
||||
```yaml
|
||||
---
|
||||
apiVersion: kubevirt.io/v1
|
||||
kind: VirtualMachine
|
||||
metadata:
|
||||
name: fedora-vm
|
||||
spec:
|
||||
running: false
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
kubevirt.io/vm: fedora-vm
|
||||
annotations:
|
||||
kubevirt.io/allow-pod-bridge-network-live-migration: "true"
|
||||
|
||||
spec:
|
||||
evictionStrategy: LiveMigrate
|
||||
domain:
|
||||
cpu:
|
||||
cores: 2
|
||||
resources:
|
||||
requests:
|
||||
memory: 4G
|
||||
devices:
|
||||
disks:
|
||||
- name: fedora-vm-pvc
|
||||
disk:
|
||||
bus: virtio
|
||||
- name: cloudinitdisk
|
||||
disk:
|
||||
bus: virtio
|
||||
interfaces:
|
||||
- name: external
|
||||
bridge: {} # We use the bridge interface.
|
||||
networks:
|
||||
- name: external
|
||||
multus:
|
||||
networkName: namespace/networkattachmentdefinition # This is the NetworkAttachmentDefinition. See multus docs for more info.
|
||||
volumes:
|
||||
- name: fedora-vm-pvc
|
||||
persistentVolumeClaim:
|
||||
claimName: fedora-vm-pvc
|
||||
- name: cloudinitdisk
|
||||
cloudInitNoCloud:
|
||||
networkData: |
|
||||
network:
|
||||
version: 1
|
||||
config:
|
||||
- type: physical
|
||||
name: eth0
|
||||
subnets:
|
||||
- type: dhcp
|
||||
userData: |-
|
||||
#cloud-config
|
||||
users:
|
||||
- name: cloud-user
|
||||
ssh_authorized_keys:
|
||||
- ssh-rsa ....
|
||||
sudo: ['ALL=(ALL) NOPASSWD:ALL']
|
||||
groups: sudo
|
||||
shell: /bin/bash
|
||||
runcmd:
|
||||
- "sudo touch /root/installed"
|
||||
- "sudo dnf update"
|
||||
- "sudo dnf install httpd fastfetch -y"
|
||||
- "sudo systemctl daemon-reload"
|
||||
- "sudo systemctl enable httpd"
|
||||
- "sudo systemctl start --no-block httpd"
|
||||
|
||||
dataVolumeTemplates:
|
||||
- metadata:
|
||||
name: fedora-vm-pvc
|
||||
spec:
|
||||
storage:
|
||||
resources:
|
||||
requests:
|
||||
storage: 35Gi
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
storageClassName: "nfs-csi"
|
||||
source:
|
||||
http:
|
||||
url: "https://fedora.mirror.wearetriple.com/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2"
|
||||
```
|
||||
|
||||
In this example we will create a virtual machine that is bound to the bridge interface with the help of [Multus]({{< relref "../kubernetes-guides/network/multus" >}}).
|
||||
You can start the machine with `kubectl virt start fedora-vm`.
|
||||
After that you can look up the ip address of the virtual machine with
|
||||
|
||||
```bash
|
||||
kubectl get vmi -owide
|
||||
|
||||
NAME AGE PHASE IP NODENAME READY LIVE-MIGRATABLE PAUSED
|
||||
fedora-vm 6d9h Running 10.99.101.53 kube1 True True
|
||||
```
|
||||
|
||||
## Other forms of management
|
||||
|
||||
There is a project called [KubeVirt-Manager](https://kubevirt-manager.io/) for managing virtual machines with KubeVirt through a nice web interface.
|
||||
You can also choose to deploy virtual machines with ArgoCD or Flux.
|
||||
|
||||
## Documentation
|
||||
|
||||
KubeVirt has a huge documentation page where you can check out everything on running virtual machines with KubeVirt.
|
||||
The documentation can be found [here](https://kubevirt.io/user-guide/).
|
||||
314
website/content/v1.12/advanced/kernel-module.md
Normal file
314
website/content/v1.12/advanced/kernel-module.md
Normal file
@ -0,0 +1,314 @@
|
||||
---
|
||||
title: "Adding a Kernel Module"
|
||||
description: "Create a system extension that includes kernel modules."
|
||||
---
|
||||
|
||||
[System extensions]({{< relref "../talos-guides/configuration/system-extensions" >}}) in Talos provide ways to add files to the root filesystem and make it possible to run privileged containers as services.
|
||||
But Talos still requires that kernel modules be signed with a trusted signing key to be loaded at run time.
|
||||
|
||||
To add a kernel module to Talos you will need to create a system extension that is built with the kernel and signed by the same signing key.
|
||||
System extensions without kernel modules work without this requirement, but there are a couple extra steps needed when adding kernel modules.
|
||||
|
||||
## Create a package hello
|
||||
|
||||
Talos is built from the [pkgs](https://github.com/siderolabs/pkgs/) repo and the first step will be to add your custom package to that repo to be built with Talos.
|
||||
You can look at other packages in that repository for examples of what should be included.
|
||||
|
||||
The only file required to create a package is a pkg.yaml file in a folder.
|
||||
Let's create an example package to walk through each step.
|
||||
|
||||
Clone the repo and create a folder.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/siderolabs/pkgs.git
|
||||
cd pkgs
|
||||
mkdir my-module
|
||||
```
|
||||
|
||||
Now add the package to the `.kres.yaml` file in the root of the repository.
|
||||
We use this file for templating and generating Makefiles.
|
||||
Put your out-of-tree kernel module below the comment for dependent packages.
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
targets:
|
||||
...
|
||||
# - kernel & dependent packages (out of tree kernel modules)
|
||||
# kernel first, then packages in alphabetical order
|
||||
...
|
||||
- my-module-pkg
|
||||
...
|
||||
```
|
||||
|
||||
Run the following command to generate a new Makefile.
|
||||
|
||||
```bash
|
||||
make rekres
|
||||
```
|
||||
|
||||
Now you have a `make` target to build your module and a directory to store your module configuration.
|
||||
The next step is to create a `pkg.yaml` file to tell [`bldr`](https://github.com/siderolabs/bldr) how to create a container with the files you need.
|
||||
The `bldr` tool has assumptions about directory structure and steps you can read about in the GitHub repo.
|
||||
|
||||
This example does not build a kernel module, but it can be used as a basis for your own packages.
|
||||
Please also see existing pkg.yaml files in [the pkgs repo](https://github.com/siderolabs/pkgs)
|
||||
|
||||
```bash
|
||||
name: my-module-pkg # name of your package
|
||||
variant: scratch # base container for environment (e.g. alpine, scratch)
|
||||
shell: /bin/sh # shell to use to execute commands in steps
|
||||
dependencies: # other steps required before building package
|
||||
- stage: base
|
||||
steps: # steps needed to build package container
|
||||
- sources: # download source files
|
||||
- url: https://example.com/source.tar.gz
|
||||
destination: my-module.tar.gz
|
||||
sha256: 1234abcd...
|
||||
sha521: abcd1234...
|
||||
prepare: # create directories and untar
|
||||
- tar -xzf my-module.tar.gz --strip-components=1
|
||||
build: # compiling software
|
||||
- make -j $(nproc)
|
||||
install: # move compiled software to correct directory
|
||||
- make DESTDIR=/rootfs install
|
||||
test: # validate software
|
||||
- fhs-validator /rootfs
|
||||
finalize: # copy directory structure from source to destination
|
||||
- from: /rootfs
|
||||
to: /
|
||||
```
|
||||
|
||||
## Build the package and kernel
|
||||
|
||||
After you've created a pkg.yaml file you can test building your package with the make target you generated earlier.
|
||||
Because Talos requires kernel modules to be signed with a signing key only available during the Talos kernel build process we need to build the kernel and package at the same time.
|
||||
|
||||
We also need a container registry available to store the built assets.
|
||||
Follow the steps in [developing Talos]({{< relref "../advanced/developing-talos#prepare" >}}) to create a docker builder and run a local container registry before running this command.
|
||||
|
||||
```bash
|
||||
make kernel my-module-pkg REGISTRY=127.0.0.1:5005 \
|
||||
PLATFORM=linux/amd64 \
|
||||
PUSH=true
|
||||
```
|
||||
|
||||
If this is successful it should output two pieces of information we need to collect for the next steps.
|
||||
We need to save the kernel and package images.
|
||||
The output will look something like this:
|
||||
|
||||
```bash
|
||||
=> => pushing manifest for 127.0.0.1:5005/user/kernel:v1.11.0-alpha.0...
|
||||
...
|
||||
=> => pushing manifest for 127.0.0.1:5005/user/my-module-pkg:v1.11.0-alpha.0...
|
||||
```
|
||||
|
||||
For easier reference in this guide I will save these images as `$KERNEL_IMAGE` and `$PKG_IMAGE` variables.
|
||||
|
||||
## Create an extension
|
||||
|
||||
System extensions are the way to add software and files to a Talos Linux root filesystem.
|
||||
Just like packages they are built as containers and then layered with Talos to create a bootable squashfs image.
|
||||
|
||||
The only unique thing about building a system extension with a kernel module is we need to build it against the kernel we just built in the previous step.
|
||||
If we don't do this then our kernel module won't be signed and cannot be loaded at runtime.
|
||||
|
||||
The process is very similar to creating a package.
|
||||
Start by cloning the extensions repo:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/siderolabs/extensions
|
||||
cd extensions
|
||||
mkdir my-module
|
||||
```
|
||||
|
||||
Add your extension to the `.kres.yaml` file.
|
||||
|
||||
```yaml
|
||||
---
|
||||
kind: pkgfile.Build
|
||||
spec:
|
||||
targets:
|
||||
...
|
||||
- my-module
|
||||
...
|
||||
```
|
||||
|
||||
Then generate a new Makefile with additional target.
|
||||
|
||||
```bash
|
||||
make rekres
|
||||
```
|
||||
|
||||
Now create the `manifest.yaml` file for the metadata of your extension in the my-module folder.
|
||||
|
||||
```yaml
|
||||
version: v1alpha1 # version of manifest.yaml
|
||||
metadata:
|
||||
name: my-module
|
||||
version: 0.1
|
||||
author: me
|
||||
description: |
|
||||
An extension that adds a kernel module
|
||||
compatibility:
|
||||
talos:
|
||||
version: ">= v1.10.0" # what version of Talos is supported
|
||||
```
|
||||
|
||||
Create a pkg.yaml file in the my-module folder which works similarly to our pkg.yaml file for our package, but this time starts from the base image we built in the first step.
|
||||
The local directory is mounted into the container at `/pkg` so we can copy files from that directory.
|
||||
|
||||
```yaml
|
||||
name: my-module
|
||||
variant: scratch
|
||||
shell: /bin/sh
|
||||
dependencies:
|
||||
- stage: base
|
||||
- image: "${PKG_IMAGE}" # the image we built in the first step
|
||||
steps:
|
||||
- install:
|
||||
- mkdir -p /rootfs/usr/lib/modules
|
||||
- cp -R /usr/lib/modules/* /rootfs/usr/lib/modules/
|
||||
finalize:
|
||||
- from: /rootfs
|
||||
to: /rootfs
|
||||
- from: /pkg/manifest.yaml # make sure you add the metadata file
|
||||
to: /
|
||||
```
|
||||
|
||||
Lastly create a vars.yaml file to store a version variable in the my-module folder.
|
||||
This isn't strictly required, but it is a convention used which will let the automated build work.
|
||||
|
||||
```bash
|
||||
echo 'VERSION: "0.1"' > vars.yaml
|
||||
```
|
||||
|
||||
## Build extension
|
||||
|
||||
You now have a complete extension config and can build it with the kernel from your previous pkg build.
|
||||
|
||||
```bash
|
||||
make my-module REGISTRY=127.0.0.1:5005 \
|
||||
PLATFORM=linux/amd64 \
|
||||
PUSH=true
|
||||
```
|
||||
|
||||
This will create a system extension image and push it to your local registry.
|
||||
Copy the image that get's pushed and save it as `${EXTENSION_IMAGE}`.
|
||||
|
||||
```bash
|
||||
export EXTENSION_IMAGE='127.0.0.1:5005/jgarr/my-module:0.1@sha256:e8f3352...'
|
||||
```
|
||||
|
||||
## Test the extension
|
||||
|
||||
Now we need to create installation media to boot Talos.
|
||||
We will build and use [imager]({{< relref "../talos-guides/install/boot-assets#imager" >}}) to include our extension.
|
||||
|
||||
Clone the Talos repo.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/siderolabs/talos
|
||||
cd talos
|
||||
```
|
||||
|
||||
Build the installer, and remember to use the kernel image from the first step.
|
||||
|
||||
```bash
|
||||
make installer-base imager PLATFORM=linux/amd64 \
|
||||
INSTALLER_ARCH=amd64 \
|
||||
REGISTRY=127.0.0.1:5005 \
|
||||
PKG_KERNEL=${KERNEL_IMAGE} \
|
||||
PUSH=true
|
||||
```
|
||||
|
||||
This will create two images - installer-base and imager and push them to your local registry.
|
||||
Export the installer-base image and save it as `$BASE_INSTALLER_IMAGE`.
|
||||
|
||||
Create an installer image from your extension and the installer-base you just created with the following command.
|
||||
|
||||
```bash
|
||||
make image-installer \
|
||||
REGISTRY=127.0.0.1:5005 \
|
||||
IMAGER_ARGS="--base-installer-image=${BASE_INSTALLER_IMAGE} \
|
||||
--system-extension-image=${EXTENSION_IMAGE}"
|
||||
```
|
||||
|
||||
We'll have a new container image tar file in the `_out/` folder of our repository.
|
||||
Load and push the container image to a registry with [crane](https://github.com/google/go-containerregistry/blob/main/cmd/crane/doc/crane.md).
|
||||
Make sure you replace `$REGISTRY`, `$USER`, and `$TAG` with the values you want.
|
||||
|
||||
```bash
|
||||
crane push _out/installer-amd64.tar $REGISTRY/$USER/installer:$TAG
|
||||
```
|
||||
|
||||
And if you don't have `crane`:
|
||||
|
||||
```bash
|
||||
docker load -i _out/installer-amd64.tar
|
||||
# note down sha256 or the image tag output from above command
|
||||
|
||||
docker tag $SHA256_OR_IMAGE_TAG $REGISTRY/$USER/installer:$TAG
|
||||
docker push $REGISTRY/$USER/installer:$TAG
|
||||
```
|
||||
|
||||
## Test the installer with fresh install
|
||||
|
||||
Now you can boot a machine from generic Talos installation media.
|
||||
This is only used to get access to the API so we can apply a configuration that will use our installer image.
|
||||
We'll assume this machine has an IP address of 192.168.100.100
|
||||
|
||||
Generate a configuration that uses your installer image.
|
||||
|
||||
```bash
|
||||
talosctl gen config --install-image $REGISTRY/$USER/installer:$TAG \
|
||||
test https://192.168.100.100:6443 # cluster name and endpoint
|
||||
```
|
||||
|
||||
Now create a configuration patch that loads your kernel module by name.
|
||||
This should be the name of the `.ko` file you built in the package and put in the `/modules` directory.
|
||||
|
||||
```yaml
|
||||
# my-module.yaml
|
||||
machine:
|
||||
kernel:
|
||||
modules:
|
||||
- name: my-module
|
||||
```
|
||||
|
||||
Apply the machine config and patch to your test machine.
|
||||
|
||||
```bash
|
||||
talosctl apply -f controlplane.yaml -i -p '@my-module.yaml' -n 192.168.100.100
|
||||
```
|
||||
|
||||
The machine will reboot as Talos is installed.
|
||||
When the machine boots you should see logs that the module was loaded from dmesg.
|
||||
|
||||
```bash
|
||||
192.168.100.100: kern: warning: my-module: loading out-of-tree module taints kernel.
|
||||
192.168.100.100: kern: info: Loading my-module driver module v0.1
|
||||
```
|
||||
|
||||
## Test installer with existing machine
|
||||
|
||||
If you already have Talos running on a machine you can apply the installer during an upgrade to have the extension installed.
|
||||
|
||||
```bash
|
||||
talosctl upgrade -i $REGISTRY/$USER/installer:$TAG
|
||||
```
|
||||
|
||||
Make sure you still create a patch to load the kernel module and apply it to the machine.
|
||||
|
||||
```yaml
|
||||
# my-module.yaml
|
||||
machine:
|
||||
kernel:
|
||||
modules:
|
||||
- name: my-module
|
||||
```
|
||||
|
||||
Apply the machine config and patch to your test machine.
|
||||
|
||||
```bash
|
||||
talosctl apply -f controlplane.yaml -p '@my-module.yaml'
|
||||
```
|
||||
83
website/content/v1.12/advanced/machine-config-oauth.md
Normal file
83
website/content/v1.12/advanced/machine-config-oauth.md
Normal file
@ -0,0 +1,83 @@
|
||||
---
|
||||
title: "Machine Configuration OAuth2 Authentication"
|
||||
description: "How to authenticate Talos machine configuration download (`talos.config=`) on `metal` platform using OAuth."
|
||||
---
|
||||
|
||||
Talos Linux when running on the `metal` platform can be configured to authenticate the machine configuration download using OAuth2 device flow.
|
||||
The machine configuration is fetched from the URL specified with `talos.config` kernel argument, and by default this HTTP request is not authenticated.
|
||||
When the OAuth2 authentication is enabled, Talos will authenticate the request using OAuth device flow first, and then pass the token to the machine configuration download endpoint.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Obtain the following information:
|
||||
|
||||
* OAuth client ID (mandatory)
|
||||
* OAuth client secret (optional)
|
||||
* OAuth device endpoint
|
||||
* OAuth token endpoint
|
||||
* OAuth scopes, audience (optional)
|
||||
* OAuth client secret (optional)
|
||||
* extra Talos variables to send to the device auth endpoint (optional)
|
||||
|
||||
## Configuration
|
||||
|
||||
Set the following kernel parameters on the initial Talos boot to enable the OAuth flow:
|
||||
|
||||
* `talos.config` set to the URL of the machine configuration endpoint (which will be authenticated using OAuth)
|
||||
* `talos.config.oauth.client_id` set to the OAuth client ID (required)
|
||||
* `talos.config.oauth.client_secret` set to the OAuth client secret (optional)
|
||||
* `talos.config.oauth.scope` set to the OAuth scopes (optional, repeat the parameter for multiple scopes)
|
||||
* `talos.config.oauth.audience` set to the OAuth audience (optional)
|
||||
* `talos.config.oauth.device_auth_url` set to the OAuth device endpoint (if not set defaults to `talos.config` URL with the path `/device/code`)
|
||||
* `talos.config.oauth.token_url` set to the OAuth token endpoint (if not set defaults to `talos.config` URL with the path `/token`)
|
||||
* `talos.config.oauth.extra_variable` set to the extra Talos variables to send to the device auth endpoint (optional, repeat the parameter for multiple variables)
|
||||
|
||||
The list of variables supported by the `talos.config.oauth.extra_variable` parameter is same as the [list of variables]({{< relref "../reference/kernel#talosconfig" >}}) supported by the `talos.config` parameter.
|
||||
|
||||
## Flow
|
||||
|
||||
On the initial Talos boot, when machine configuration is not available, Talos will print the following messages:
|
||||
|
||||
```text
|
||||
[talos] downloading config {"component": "controller-runtime", "controller": "config.AcquireController", "platform": "metal"}
|
||||
[talos] waiting for network to be ready
|
||||
[talos] [OAuth] starting the authentication device flow with the following settings:
|
||||
[talos] [OAuth] - client ID: "<REDACTED>"
|
||||
[talos] [OAuth] - device auth URL: "https://oauth2.googleapis.com/device/code"
|
||||
[talos] [OAuth] - token URL: "https://oauth2.googleapis.com/token"
|
||||
[talos] [OAuth] - extra variables: ["uuid" "mac"]
|
||||
[talos] waiting for variables: [uuid mac]
|
||||
[talos] waiting for variables: [mac]
|
||||
[talos] [OAuth] please visit the URL https://www.google.com/device and enter the code <REDACTED>
|
||||
[talos] [OAuth] waiting for the device to be authorized (expires at 14:46:55)...
|
||||
```
|
||||
|
||||
If the OAuth service provides the complete verification URL, the QR code to scan is also printed to the console:
|
||||
|
||||
```text
|
||||
[talos] [OAuth] or scan the following QR code:
|
||||
█████████████████████████████████
|
||||
█████████████████████████████████
|
||||
████ ▄▄▄▄▄ ██▄▀▀ ▀█ ▄▄▄▄▄ ████
|
||||
████ █ █ █▄ ▀▄██▄██ █ █ ████
|
||||
████ █▄▄▄█ ██▀▄██▄ ▀█ █▄▄▄█ ████
|
||||
████▄▄▄▄▄▄▄█ ▀ █ ▀ █▄█▄▄▄▄▄▄▄████
|
||||
████ ▀ ▄▄ ▄█ ██▄█ ███▄█▀████
|
||||
████▀█▄ ▄▄▀▄▄█▀█▄██ ▄▀▄██▄ ▄████
|
||||
████▄██▀█▄▄▄███▀ ▀█▄▄ ██ █▄ ████
|
||||
████▄▀▄▄▄ ▄███ ▄ ▀ ▀▀▄▀▄▀█▄ ▄████
|
||||
████▄█████▄█ █ ██ ▀ ▄▄▄ █▀▀████
|
||||
████ ▄▄▄▄▄ █ █ ▀█▄█▄ █▄█ █▄ ████
|
||||
████ █ █ █▄ ▄▀ ▀█▀▄▄▄ ▀█▄████
|
||||
████ █▄▄▄█ █ ██▄ ▀ ▀███ ▀█▀▄████
|
||||
████▄▄▄▄▄▄▄█▄▄█▄██▄▄▄▄█▄███▄▄████
|
||||
█████████████████████████████████
|
||||
```
|
||||
|
||||
Once the authentication flow is complete on the OAuth provider side, Talos will print the following message:
|
||||
|
||||
```text
|
||||
[talos] [OAuth] device authorized
|
||||
[talos] fetching machine config from: "http://example.com/config.yaml"
|
||||
[talos] machine config loaded successfully {"component": "controller-runtime", "controller": "config.AcquireController", "sources": ["metal"]}
|
||||
```
|
||||
420
website/content/v1.12/advanced/metal-network-configuration.md
Normal file
420
website/content/v1.12/advanced/metal-network-configuration.md
Normal file
@ -0,0 +1,420 @@
|
||||
---
|
||||
title: "Metal Network Configuration"
|
||||
description: "How to use `META`-based network configuration on Talos `metal` platform."
|
||||
---
|
||||
|
||||
> Note: This is an advanced feature which requires deep understanding of Talos and Linux network configuration.
|
||||
|
||||
Talos Linux when running on a cloud platform (e.g. AWS or Azure), uses the platform-provided metadata server to provide initial network configuration to the node.
|
||||
When running on bare-metal, there is no metadata server, so there are several options to provide initial network configuration (before machine configuration is acquired):
|
||||
|
||||
- use automatic network configuration via DHCP (Talos default)
|
||||
- use initial boot [kernel command line parameters]({{< relref "../reference/kernel" >}}) to configure networking
|
||||
- use automatic network configuration via DHCP just enough to fetch machine configuration and then use machine configuration to set desired advanced configuration.
|
||||
|
||||
If DHCP option is available, it is by far the easiest way to configure networking.
|
||||
The initial boot kernel command line parameters are not very flexible, and they are not persisted after initial Talos installation.
|
||||
|
||||
Talos starting with version 1.4.0 offers a new option to configure networking on bare-metal: `META`-based network configuration.
|
||||
|
||||
> Note: `META`-based network configuration is only available on Talos Linux `metal` platform.
|
||||
|
||||
Talos [dashboard]({{< relref "../talos-guides/interactive-dashboard" >}}) provides a way to configure `META`-based network configuration for a machine using the console, but
|
||||
it doesn't support all kinds of network configuration.
|
||||
|
||||
## Network Configuration Format
|
||||
|
||||
Talos `META`-based network configuration is a YAML file with the following format:
|
||||
|
||||
```yaml
|
||||
addresses:
|
||||
- address: 147.75.61.43/31
|
||||
linkName: bond0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
- address: 2604:1380:45f2:6c00::1/127
|
||||
linkName: bond0
|
||||
family: inet6
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
- address: 10.68.182.1/31
|
||||
linkName: bond0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
links:
|
||||
- name: eth0
|
||||
up: true
|
||||
masterName: bond0
|
||||
slaveIndex: 0
|
||||
layer: platform
|
||||
- name: eth1
|
||||
up: true
|
||||
masterName: bond0
|
||||
slaveIndex: 1
|
||||
layer: platform
|
||||
- name: bond0
|
||||
logical: true
|
||||
up: true
|
||||
mtu: 0
|
||||
kind: bond
|
||||
type: ether
|
||||
bondMaster:
|
||||
mode: 802.3ad
|
||||
xmitHashPolicy: layer3+4
|
||||
lacpRate: slow
|
||||
arpValidate: none
|
||||
arpAllTargets: any
|
||||
primaryReselect: always
|
||||
failOverMac: 0
|
||||
miimon: 100
|
||||
updelay: 200
|
||||
downdelay: 200
|
||||
resendIgmp: 1
|
||||
lpInterval: 1
|
||||
packetsPerSlave: 1
|
||||
numPeerNotif: 1
|
||||
tlbLogicalLb: 1
|
||||
adActorSysPrio: 65535
|
||||
layer: platform
|
||||
routes:
|
||||
- family: inet4
|
||||
gateway: 147.75.61.42
|
||||
outLinkName: bond0
|
||||
table: main
|
||||
priority: 1024
|
||||
scope: global
|
||||
type: unicast
|
||||
protocol: static
|
||||
layer: platform
|
||||
- family: inet6
|
||||
gateway: '2604:1380:45f2:6c00::'
|
||||
outLinkName: bond0
|
||||
table: main
|
||||
priority: 2048
|
||||
scope: global
|
||||
type: unicast
|
||||
protocol: static
|
||||
layer: platform
|
||||
- family: inet4
|
||||
dst: 10.0.0.0/8
|
||||
gateway: 10.68.182.0
|
||||
outLinkName: bond0
|
||||
table: main
|
||||
scope: global
|
||||
type: unicast
|
||||
protocol: static
|
||||
layer: platform
|
||||
hostnames:
|
||||
- hostname: ci-blue-worker-amd64-2
|
||||
layer: platform
|
||||
resolvers: []
|
||||
timeServers: []
|
||||
```
|
||||
|
||||
Every section is optional, so you can configure only the parts you need.
|
||||
The format of each section matches the respective network [`*Spec` resource]({{< relref "../learn-more/networking-resources" >}}) `.spec` part, e.g the `addresses:`
|
||||
section matches the `.spec` of `AddressSpec` resource:
|
||||
|
||||
```yaml
|
||||
# talosctl get addressspecs bond0/10.68.182.1/31 -o yaml | yq .spec
|
||||
address: 10.68.182.1/31
|
||||
linkName: bond0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
```
|
||||
|
||||
So one way to prepare the network configuration file is to boot Talos Linux, apply necessary network configuration using Talos machine configuration, and grab the resulting
|
||||
resources from the running Talos instance.
|
||||
|
||||
In this guide we will briefly cover the most common examples of the network configuration.
|
||||
|
||||
### Addresses
|
||||
|
||||
The addresses configured are usually routable IP addresses assigned to the machine, so
|
||||
the `scope:` should be set to `global` and `flags:` to `permanent`.
|
||||
Additionally, `family:` should be set to either `inet4` or `inet6` depending on the address family.
|
||||
|
||||
The `linkName:` property should match the name of the link the address is assigned to, it might be a physical link,
|
||||
e.g. `en9sp0`, or the name of a logical link, e.g. `bond0`, created in the `links:` section.
|
||||
|
||||
Example, IPv4 address:
|
||||
|
||||
```yaml
|
||||
addresses:
|
||||
- address: 147.75.61.43/31
|
||||
linkName: bond0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
```
|
||||
|
||||
Example, IPv6 address:
|
||||
|
||||
```yaml
|
||||
addresses:
|
||||
- address: 2604:1380:45f2:6c00::1/127
|
||||
linkName: bond0
|
||||
family: inet6
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: platform
|
||||
```
|
||||
|
||||
### Links
|
||||
|
||||
For physical network interfaces (links), the most usual configuration is to bring the link up:
|
||||
|
||||
```yaml
|
||||
links:
|
||||
- name: en9sp0
|
||||
up: true
|
||||
layer: platform
|
||||
```
|
||||
|
||||
This will bring the link up, and it will also disable Talos auto-configuration (disables running DHCP on the link).
|
||||
|
||||
Another common case is to set a custom MTU:
|
||||
|
||||
```yaml
|
||||
links:
|
||||
- name: en9sp0
|
||||
up: true
|
||||
mtu: 9000
|
||||
layer: platform
|
||||
```
|
||||
|
||||
The order of the links in the `links:` section is not important.
|
||||
|
||||
#### Bonds
|
||||
|
||||
For bonded links, there should be a link resource for the bond itself, and a link resource for each enslaved link:
|
||||
|
||||
```yaml
|
||||
links:
|
||||
- name: bond0
|
||||
logical: true
|
||||
up: true
|
||||
kind: bond
|
||||
type: ether
|
||||
bondMaster:
|
||||
mode: 802.3ad
|
||||
xmitHashPolicy: layer3+4
|
||||
lacpRate: slow
|
||||
arpValidate: none
|
||||
arpAllTargets: any
|
||||
primaryReselect: always
|
||||
failOverMac: 0
|
||||
miimon: 100
|
||||
updelay: 200
|
||||
downdelay: 200
|
||||
resendIgmp: 1
|
||||
lpInterval: 1
|
||||
packetsPerSlave: 1
|
||||
numPeerNotif: 1
|
||||
tlbLogicalLb: 1
|
||||
adActorSysPrio: 65535
|
||||
layer: platform
|
||||
- name: eth0
|
||||
up: true
|
||||
masterName: bond0
|
||||
slaveIndex: 0
|
||||
layer: platform
|
||||
- name: eth1
|
||||
up: true
|
||||
masterName: bond0
|
||||
slaveIndex: 1
|
||||
layer: platform
|
||||
```
|
||||
|
||||
The name of the bond can be anything supported by Linux kernel, but the following properties are important:
|
||||
|
||||
- `logical: true` - this is a logical link, not a physical one
|
||||
- `kind: bond` - this is a bonded link
|
||||
- `type: ether` - this is an Ethernet link
|
||||
- `bondMaster:` - defines bond configuration, please see Linux documentation on the available options
|
||||
|
||||
For each enslaved link, the following properties are important:
|
||||
|
||||
- `masterName: bond0` - the name of the bond this link is enslaved to
|
||||
- `slaveIndex: 0` - the index of the enslaved link, starting from 0, controls the order of bond slaves
|
||||
|
||||
#### VLANs
|
||||
|
||||
VLANs are logical links which have a parent link, and a VLAN ID and protocol:
|
||||
|
||||
```yaml
|
||||
links:
|
||||
- name: bond0.35
|
||||
logical: true
|
||||
up: true
|
||||
kind: vlan
|
||||
type: ether
|
||||
parentName: bond0
|
||||
vlan:
|
||||
vlanID: 35
|
||||
vlanProtocol: 802.1ad
|
||||
```
|
||||
|
||||
The name of the VLAN link can be anything supported by Linux kernel, but the following properties are important:
|
||||
|
||||
- `logical: true` - this is a logical link, not a physical one
|
||||
- `kind: vlan` - this is a VLAN link
|
||||
- `type: ether` - this is an Ethernet link
|
||||
- `parentName: bond0` - the name of the parent link
|
||||
- `vlan:` - defines VLAN configuration: `vlanID` and `vlanProtocol`
|
||||
|
||||
### Routes
|
||||
|
||||
For route configuration, most of the time `table: main`, `scope: global`, `type: unicast` and `protocol: static` are used.
|
||||
|
||||
The route most important fields are:
|
||||
|
||||
- `dst:` defines the destination network, if left empty means "default gateway"
|
||||
- `gateway:` defines the gateway address
|
||||
- `priority:` defines the route priority (metric), lower values are preferred for the same `dst:` network
|
||||
- `outLinkName:` defines the name of the link the route is associated with
|
||||
- `src:` sets the source address for the route (optional)
|
||||
|
||||
Additionally, `family:` should be set to either `inet4` or `inet6` depending on the address family.
|
||||
|
||||
Example, IPv6 default gateway:
|
||||
|
||||
```yaml
|
||||
routes:
|
||||
- family: inet6
|
||||
gateway: '2604:1380:45f2:6c00::'
|
||||
outLinkName: bond0
|
||||
table: main
|
||||
priority: 2048
|
||||
scope: global
|
||||
type: unicast
|
||||
protocol: static
|
||||
layer: platform
|
||||
```
|
||||
|
||||
Example, IPv4 route to `10/8` via `10.68.182.0` gateway:
|
||||
|
||||
```yaml
|
||||
routes:
|
||||
- family: inet4
|
||||
dst: 10.0.0.0/8
|
||||
gateway: 10.68.182.0
|
||||
outLinkName: bond0
|
||||
table: main
|
||||
scope: global
|
||||
type: unicast
|
||||
protocol: static
|
||||
layer: platform
|
||||
```
|
||||
|
||||
### Hostnames
|
||||
|
||||
Even though the section supports multiple hostnames, only a single one should be used:
|
||||
|
||||
```yaml
|
||||
hostnames:
|
||||
- hostname: host
|
||||
domainname: some.org
|
||||
layer: platform
|
||||
```
|
||||
|
||||
The `domainname:` is optional.
|
||||
|
||||
If the hostname is not set, Talos will use default generated hostname.
|
||||
|
||||
### Resolvers
|
||||
|
||||
The `resolvers:` section is used to configure DNS resolvers, only single entry should be used:
|
||||
|
||||
```yaml
|
||||
resolvers:
|
||||
- dnsServers:
|
||||
- 8.8.8.8
|
||||
- 1.1.1.1
|
||||
layer: platform
|
||||
```
|
||||
|
||||
If the `dnsServers:` is not set, Talos will use default DNS servers.
|
||||
|
||||
### Time Servers
|
||||
|
||||
The `timeServers:` section is used to configure NTP time servers, only single entry should be used:
|
||||
|
||||
```yaml
|
||||
timeServers:
|
||||
- timeServers:
|
||||
- 169.254.169.254
|
||||
layer: platform
|
||||
```
|
||||
|
||||
If the `timeServers:` is not set, Talos will use default NTP servers.
|
||||
|
||||
## Supplying `META` Network Configuration
|
||||
|
||||
Once the network configuration YAML document is ready, it can be supplied to Talos in one of the following ways:
|
||||
|
||||
- for a running Talos machine, using Talos API (requires already established network connectivity)
|
||||
- for Talos disk images, it can be embedded into the image
|
||||
- for ISO/PXE boot methods, it can be supplied via kernel command line parameters as an environment variable
|
||||
|
||||
The metal network configuration is stored in Talos `META` partition under the key `0xa` (decimal 10).
|
||||
|
||||
In this guide we will assume that the prepared network configuration is stored in the file `network.yaml`.
|
||||
|
||||
> Note: as JSON is a subset of YAML, the network configuration can be also supplied as a JSON document.
|
||||
|
||||
### Supplying Network Configuration to a Running Talos Machine
|
||||
|
||||
Use the `talosctl` to write a network configuration to a running Talos machine:
|
||||
|
||||
```bash
|
||||
talosctl meta write 0xa "$(cat network.yaml)"
|
||||
```
|
||||
|
||||
### Supplying Network Configuration to a Talos Disk Image
|
||||
|
||||
Following the [boot assets]({{< relref "../talos-guides/install/boot-assets" >}}) guide, create a disk image passing the network configuration as a `--meta` flag:
|
||||
|
||||
```bash
|
||||
docker run --rm -t -v $PWD/_out:/out -v /dev:/dev --privileged ghcr.io/siderolabs/imager:{{< release >}} metal --meta "0xa=$(cat network.yaml)"
|
||||
```
|
||||
|
||||
### Supplying Network Configuration to a Talos ISO/PXE Boot
|
||||
|
||||
As there is no `META` partition created yet before Talos Linux is installed, `META` values can be set as an environment variable `INSTALLER_META_BASE64` passed to the initial boot of Talos.
|
||||
The supplied value will be used immediately, and also it will be written to the `META` partition once Talos is installed.
|
||||
|
||||
When using `imager` to create the ISO, the `INSTALLER_META_BASE64` environment variable will be automatically generated from the `--meta` flag:
|
||||
|
||||
```bash
|
||||
$ docker run --rm -t -v $PWD/_out:/out ghcr.io/siderolabs/imager:{{< release >}} iso --meta "0xa=$(cat network.yaml)"
|
||||
...
|
||||
kernel command line: ... talos.environment=INSTALLER_META_BASE64=MHhhPWZvbw==
|
||||
```
|
||||
|
||||
When PXE booting, the value of `INSTALLER_META_BASE64` should be set manually:
|
||||
|
||||
```bash
|
||||
echo -n "0xa=$(cat network.yaml)" | gzip -9 | base64
|
||||
```
|
||||
|
||||
The resulting base64 string should be passed as an environment variable `INSTALLER_META_BASE64` to the initial boot of Talos: `talos.environment=INSTALLER_META_BASE64=<base64-encoded value>`.
|
||||
|
||||
### Getting Current `META` Network Configuration
|
||||
|
||||
Talos exports `META` keys as resources:
|
||||
|
||||
```yaml
|
||||
# talosctl get meta 0x0a -o yaml
|
||||
...
|
||||
spec:
|
||||
value: '{"addresses": ...}'
|
||||
```
|
||||
150
website/content/v1.12/advanced/migrating-from-kubeadm.md
Normal file
150
website/content/v1.12/advanced/migrating-from-kubeadm.md
Normal file
@ -0,0 +1,150 @@
|
||||
---
|
||||
title: "Migrating from Kubeadm"
|
||||
description: "Migrating Kubeadm-based clusters to Talos."
|
||||
aliases:
|
||||
- ../guides/migrating-from-kubeadm
|
||||
---
|
||||
|
||||
It is possible to migrate Talos from a cluster that is created using
|
||||
[kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/) to Talos.
|
||||
|
||||
High-level steps are the following:
|
||||
|
||||
1. Collect CA certificates and a bootstrap token from a control plane node.
|
||||
2. Create a Talos machine config with the CA certificates with the ones you collected.
|
||||
3. Update control plane endpoint in the machine config to point to the existing control plane (i.e. your load balancer address).
|
||||
4. Boot a new Talos machine and apply the machine config.
|
||||
5. Verify that the new control plane node is ready.
|
||||
6. Remove one of the old control plane nodes.
|
||||
7. Repeat the same steps for all control plane nodes.
|
||||
8. Verify that all control plane nodes are ready.
|
||||
9. Repeat the same steps for all worker nodes, using the machine config generated for the workers.
|
||||
|
||||
## Remarks on kube-apiserver load balancer
|
||||
|
||||
While migrating to Talos, you need to make sure that your kube-apiserver load balancer is in place
|
||||
and keeps pointing to the correct set of control plane nodes.
|
||||
|
||||
This process depends on your load balancer setup.
|
||||
|
||||
If you are using an LB that is external to the control plane nodes (e.g. cloud provider LB, F5 BIG-IP, etc.),
|
||||
you need to make sure that you update the backend IPs of the load balancer to point to the control plane nodes as
|
||||
you add Talos nodes and remove kubeadm-based ones.
|
||||
|
||||
If your load balancing is done on the control plane nodes (e.g. keepalived + haproxy on the control plane nodes),
|
||||
you can do the following:
|
||||
|
||||
1. Add Talos nodes and remove kubeadm-based ones while updating the haproxy backends
|
||||
to point to the newly added nodes except the last kubeadm-based control plane node.
|
||||
2. Turn off keepalived to drop the virtual IP used by the kubeadm-based nodes (introduces kube-apiserver downtime).
|
||||
3. Set up a virtual-IP based new load balancer on the new set of Talos control plane nodes.
|
||||
Use the previous LB IP as the LB virtual IP.
|
||||
4. Verify apiserver connectivity over the Talos-managed virtual IP.
|
||||
5. Migrate the last control-plane node.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Admin access to the kubeadm-based cluster
|
||||
- Access to the `/etc/kubernetes/pki` directory (e.g. SSH & root permissions)
|
||||
on the control plane nodes of the kubeadm-based cluster
|
||||
- Access to kube-apiserver load-balancer configuration
|
||||
|
||||
## Step-by-step guide
|
||||
|
||||
1. Download `/etc/kubernetes/pki` directory from a control plane node of the kubeadm-based cluster.
|
||||
|
||||
2. Create a new join token for the new control plane nodes:
|
||||
|
||||
```bash
|
||||
# inside a control plane node
|
||||
kubeadm token create --ttl 0
|
||||
```
|
||||
|
||||
3. Create Talos secrets from the PKI directory you downloaded on step 1 and the token you generated on step 2:
|
||||
|
||||
```bash
|
||||
talosctl gen secrets --kubernetes-bootstrap-token <TOKEN> --from-kubernetes-pki <PKI_DIR>
|
||||
```
|
||||
|
||||
4. Create a new Talos config from the secrets:
|
||||
|
||||
```bash
|
||||
talosctl gen config --with-secrets secrets.yaml <CLUSTER_NAME> https://<EXISTING_CLUSTER_LB_IP>
|
||||
```
|
||||
|
||||
5. Collect the information about the kubeadm-based cluster from the kubeadm configmap:
|
||||
|
||||
```bash
|
||||
kubectl get configmap -n kube-system kubeadm-config -oyaml
|
||||
```
|
||||
|
||||
Take note of the following information in the `ClusterConfiguration`:
|
||||
- `.controlPlaneEndpoint`
|
||||
- `.networking.dnsDomain`
|
||||
- `.networking.podSubnet`
|
||||
- `.networking.serviceSubnet`
|
||||
|
||||
6. Replace the following information in the generated `controlplane.yaml`:
|
||||
- `.cluster.network.cni.name` with `none`
|
||||
- `.cluster.network.podSubnets[0]` with the value of the `networking.podSubnet` from the previous step
|
||||
- `.cluster.network.serviceSubnets[0]` with the value of the `networking.serviceSubnet` from the previous step
|
||||
- `.cluster.network.dnsDomain` with the value of the `networking.dnsDomain` from the previous step
|
||||
|
||||
7. Go through the rest of `controlplane.yaml` and `worker.yaml` to customize them according to your needs, especially :
|
||||
- `.cluster.secretboxEncryptionSecret` should be either removed if you don't currently use `EncryptionConfig` on your `kube-apiserver` or set to the correct value
|
||||
|
||||
8. Make sure that, on your current Kubeadm cluster, the first `--service-account-issuer=` parameter in `/etc/kubernetes/manifests/kube-apiserver.yaml` is equal to the value of `.cluster.controlPlane.endpoint` in `controlplane.yaml`.
|
||||
If it's not, add a new `--service-account-issuer=` parameter with the correct value before your current one in `/etc/kubernetes/manifests/kube-apiserver.yaml` on all of your control planes nodes, and restart the kube-apiserver containers.
|
||||
|
||||
9. Bring up a Talos node to be the initial Talos control plane node.
|
||||
|
||||
10. Apply the generated `controlplane.yaml` to the Talos control plane node:
|
||||
|
||||
```bash
|
||||
talosctl --nodes <TALOS_NODE_IP> apply-config --insecure --file controlplane.yaml
|
||||
```
|
||||
|
||||
11. Wait until the new control plane node joins the cluster and is ready.
|
||||
|
||||
```bash
|
||||
kubectl get node -owide --watch
|
||||
```
|
||||
|
||||
12. Update your load balancer to point to the new control plane node.
|
||||
|
||||
13. Drain the old control plane node you are replacing:
|
||||
|
||||
```bash
|
||||
kubectl drain <OLD_NODE> --delete-emptydir-data --force --ignore-daemonsets --timeout=10m
|
||||
```
|
||||
|
||||
14. Remove the old control plane node from the cluster:
|
||||
|
||||
```bash
|
||||
kubectl delete node <OLD_NODE>
|
||||
```
|
||||
|
||||
15. Destroy the old node:
|
||||
|
||||
```bash
|
||||
# inside the node
|
||||
sudo kubeadm reset --force
|
||||
```
|
||||
|
||||
16. Repeat the same steps, starting from step 7, for all control plane nodes.
|
||||
|
||||
17. Repeat the same steps, starting from step 7, for all worker nodes while applying the `worker.yaml` instead and skipping the LB step:
|
||||
|
||||
```bash
|
||||
talosctl --nodes <TALOS_NODE_IP> apply-config --insecure --file worker.yaml
|
||||
```
|
||||
|
||||
18. Your kubeadm `kube-proxy` configuration may not be compatible with the one generated by Talos, which will make the Talos Kubernetes upgrades impossible (labels may not be the same, and `selector.matchLabels` is an immutable field).
|
||||
To be sure, export your current kube-proxy daemonset manifest, check the labels, they have to be:
|
||||
|
||||
```yaml
|
||||
tier: node
|
||||
k8s-app: kube-proxy
|
||||
```
|
||||
|
||||
If the are not, modify all the labels fields, save the file, delete your current kube-proxy daemonset, and apply the one you modified.
|
||||
49
website/content/v1.12/advanced/oci-base-spec.md
Normal file
49
website/content/v1.12/advanced/oci-base-spec.md
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
title: "OCI Base Runtime Specification"
|
||||
description: "Adjusting OCI base runtime specification for CRI containers."
|
||||
---
|
||||
|
||||
Every container initiated by the Container Runtime Interface (CRI) adheres to the [OCI runtime specification](https://github.com/opencontainers/runtime-spec/blob/main/spec.md).
|
||||
While certain aspects of this specification can be modified through Kubernetes pod and container configurations, others remain fixed.
|
||||
|
||||
Talos Linux provides the capability to adjust the OCI base runtime specification for all containers managed by the CRI.
|
||||
However, it is important to note that the Kubernetes/CRI plugin may still override some settings, meaning changes to the base runtime specification are not always guaranteed to take effect.
|
||||
|
||||
## Getting Current OCI Base Runtime Specification
|
||||
|
||||
To get the current OCI base runtime specification, you can use the following command (`yq -P .` is used to pretty-print the output):
|
||||
|
||||
```bash
|
||||
$ talosctl read /etc/cri/conf.d/base-spec.json | yq -P .
|
||||
ociVersion: 1.2.0
|
||||
process:
|
||||
user:
|
||||
uid: 0
|
||||
gid: 0
|
||||
cwd: /
|
||||
capabilities:
|
||||
bounding:
|
||||
- CAP_CHOWN
|
||||
...
|
||||
```
|
||||
|
||||
The output might depend on a specific Talos (`containerd`) version.
|
||||
|
||||
## Adjusting OCI Base Runtime Specification
|
||||
|
||||
To adjust the OCI base runtime specification, the following machine configuration patch can be used:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
baseRuntimeSpecOverrides:
|
||||
process:
|
||||
rlimits:
|
||||
- type: RLIMIT_NOFILE
|
||||
hard: 1024
|
||||
soft: 1024
|
||||
```
|
||||
|
||||
In this example, the number of open files is adjusted to be 1024 for all containers (OCI default is unset, so it inherits the Talos default of 1048576 open files).
|
||||
The contents of the `baseRuntimeSpecOverrides` field are merged with the current base runtime specification, so only the fields that need to be adjusted should be included.
|
||||
|
||||
This configuration change will be applied with a machine reboot, and OCI base runtime specification will only affect new containers created after the change on the node.
|
||||
113
website/content/v1.12/advanced/overlays.md
Normal file
113
website/content/v1.12/advanced/overlays.md
Normal file
@ -0,0 +1,113 @@
|
||||
---
|
||||
title: "Overlays"
|
||||
description: "Overlays"
|
||||
---
|
||||
|
||||
Overlays provide a way to customize Talos Linux boot image.
|
||||
Overlays hook into the Talos install steps and can be used to provide additional boot assets (in the case of single board computers),
|
||||
extra kernel arguments or some custom configuration that is not part of the default Talos installation and specific to a particular overlay.
|
||||
|
||||
## Overlays v/s Extensions
|
||||
|
||||
Overlays are similar to extensions, but they are used to customize the installation process, while extensions are used to customize the root filesystem.
|
||||
|
||||
## Official Overlays
|
||||
|
||||
The list of official overlays can be found in the [Overlays GitHub repository](https://github.com/siderolabs/overlays/).
|
||||
|
||||
## Using Overlays
|
||||
|
||||
Overlays can be used to generate a modified metal image or installer image with the overlay applied.
|
||||
|
||||
The process of generating boot assets with overlays is described in the [boot assets guide]({{< relref "../talos-guides/install/boot-assets" >}}).
|
||||
|
||||
### Example: Booting a Raspberry Pi 4 with an Overlay
|
||||
|
||||
Follow the board specific guide for [Raspberry Pi]({{< relref "../talos-guides/install/single-board-computers/rpi_generic" >}}) to download or generate the metal disk image and write to an SD card.
|
||||
|
||||
Boot the machine with the boot media and apply the machine configuration with the installer image that has the overlay applied.
|
||||
|
||||
```yaml
|
||||
# Talos machine configuration patch
|
||||
machine:
|
||||
install:
|
||||
image: factory.talos.dev/installer/fc1cceeb5711cd263877b6b808fbf4942a8deda65e8804c114a0b5bae252dc50:{{< release >}}
|
||||
```
|
||||
|
||||
> Note: The schematic id shown in the above patch is for a vanilla `rpi_generic` overlay.
|
||||
> Replace it with the schematic id of the overlay you want to apply.
|
||||
|
||||
## Authoring Overlays
|
||||
|
||||
An Overlay is a container image with the [specific folder structure](https://github.com/siderolabs/overlays#readme).
|
||||
Overlays can be built and managed using any tool that produces container images, e.g. `docker build`.
|
||||
|
||||
Sidero Labs maintains a [repository of overlays](https://github.com/siderolabs/overlays).
|
||||
|
||||
### Developing An Overlay
|
||||
|
||||
Let's assume that you would like to contribute an overlay for a specific board, e.g. by contributing to the [`sbc-rockchip` repository](https://github.com/siderolabs/sbc-rockchip).
|
||||
Clone the repositry and insepct the existing overlays to understand the structure.
|
||||
|
||||
Usually an overlay consist of a few key components:
|
||||
|
||||
- `firmware`: contains the firmware files required for the board
|
||||
- `bootloader`: contains the bootloader, e.g. `u-boot` for the board
|
||||
- `dtb`: contains the device tree blobs for the board
|
||||
- `installer`: contains the installer that will be used to install this overlay on the node
|
||||
- `profile`: contains information about the disk image profile, e.g. the disk image size, bootloader used, output format etc.
|
||||
|
||||
1. For the new overlay, create any needed folders and `pkg.yaml` files.
|
||||
2. If your board introduces a new chipset that is not supported yet, make sure to add the firmware build for it.
|
||||
3. Add the necessary `u-boot` and `dtb` build steps to the `pkg.yaml` files.
|
||||
4. Proceed to add an installer, which is a small go binary that will be used to install the overlay on the node.
|
||||
Here you need to add the go `src/` as well as the `pkg.yaml` file.
|
||||
5. Lastly, add the profile information in the `profiles` folder.
|
||||
|
||||
You are now ready to attempt building the overlay.
|
||||
It's recommend to push the build to a container registry to test the overlay with the Talos installer.
|
||||
|
||||
The default settings are:
|
||||
|
||||
- `REGISTRY` is set to `ghcr.io`
|
||||
- `USERNAME` is set to the `siderolabs` (or value of environment variable `USERNAME` if it is set)
|
||||
|
||||
```bash
|
||||
make sbc-rockchip PUSH=true
|
||||
```
|
||||
|
||||
If using a custom registry, the `REGISTRY` and `USERNAME` variables can be set:
|
||||
|
||||
```bash
|
||||
make sbc-rockchip PUSH=true REGISTRY=<registry> USERNAME=<username>
|
||||
```
|
||||
|
||||
After building the overlay, take note of the pushed image tag, e.g. `664638a`, because you will need it for the next step.
|
||||
You can now build a flashable image using the command below.
|
||||
|
||||
```bash
|
||||
export TALOS_VERSION=v1.7.6
|
||||
export USERNAME=octocat
|
||||
export BOARD=nanopi-r5s
|
||||
export TAG=664638a
|
||||
|
||||
docker run --rm -t -v ./_out:/out -v /dev:/dev --privileged ghcr.io/siderolabs/imager:${TALOS_VERSION} \
|
||||
"${BOARD}" --arch arm64 \
|
||||
--base-installer-image="ghcr.io/siderolabs/installer-base:${TALOS_VERSION}" \
|
||||
--overlay-name="${BOARD}" \
|
||||
--overlay-image="ghcr.io/${USERNAME}/sbc-rockchip:${TAG}" \
|
||||
```
|
||||
|
||||
> **--overlay-option**
|
||||
|
||||
`--overlay-option` can be used to pass additional options to the overlay installer if they are implemented by the overlay.
|
||||
An example can be seen in the [sbc-raspberrypi](https://github.com/siderolabs/sbc-raspberrypi/) overlay repository.
|
||||
It supports passing multiple options by repeating the flag or can be read from a yaml document by passing `--overlay-option=@<path to file>`.
|
||||
|
||||
> **Note:** In some cases you need to build a custom imager.
|
||||
> In this case, refer to [this guide on how to build a custom images]({{< relref "./building-images" >}}) using an imager.
|
||||
|
||||
#### Troubleshooting
|
||||
|
||||
> **IMPORTANT:** If this does not succeed, have a look at the documentation of the external dependecies you are pulling in and make sure that the `pkg.yaml` files are correctly configured.
|
||||
> In some cases it may be required to update the dependencies to an appropriate version via the `Pkgfile`.
|
||||
89
website/content/v1.12/advanced/sbom.md
Normal file
89
website/content/v1.12/advanced/sbom.md
Normal file
@ -0,0 +1,89 @@
|
||||
---
|
||||
title: "SBOMs"
|
||||
description: "A guide on using Software Bill of Materials for Talos Linux."
|
||||
---
|
||||
|
||||
Software Bill of Materials (SBOM) is a formal record containing the details and supply chain relationships of various components used in building a software product.
|
||||
SBOMs are used to provide transparency and traceability of software components, which is essential for security, compliance, and efficient management of software supply chains.
|
||||
|
||||
Talos Linux provides SBOMs for core operating system components, including the Linux kernel, built-in components like `containerd`, and other software packages used to build Talos Linux.
|
||||
When a system extension is installed, it can also provide its own SBOM, which will be included in the overall SBOM for the Talos Linux system.
|
||||
|
||||
## Acquiring SBOMs
|
||||
|
||||
SBOMs for Talos Linux are provided in SPDX format, which is a standard format for representing SBOMs.
|
||||
You can acquire SBOMs for Talos Linux in the following ways:
|
||||
|
||||
* Download the SBOM for a specific Talos Linux release from the [GitHub release](https://github.com/siderolabs/talos/releases/tag/{{< release >}}) page:
|
||||
* `talos-amd64.spdx.json` for the amd64 architecture.
|
||||
* `talos-arm64.spdx.json` for the arm64 architecture.
|
||||
* Acquire the SBOM from a running Talos Linux system using the `talosctl` command:
|
||||
* core Talos Linux SBOM in the `/usr/share/spdx` directory.
|
||||
* extension SBOMs in the `/usr/local/share/spdx` directory.
|
||||
|
||||
## SBOMs as Resources
|
||||
|
||||
Talos Linux SBOMs are also available as resources in the Talos Linux system.
|
||||
You can access the SBOMs using the `talosctl` command:
|
||||
|
||||
```bash
|
||||
talosctl get sboms
|
||||
NODE NAMESPACE TYPE ID VERSION VERSION LICENSE
|
||||
172.20.0.2 runtime SBOMItem Talos 1 {{< release >}}
|
||||
172.20.0.2 runtime SBOMItem apparmor 1 v3.1.7 GPL-2.0-or-later
|
||||
172.20.0.2 runtime SBOMItem cel.dev/expr 1 v0.24.0
|
||||
...
|
||||
```
|
||||
|
||||
You can also get the SBOM for a specific component using the `talosctl get sbom` command:
|
||||
|
||||
```yaml
|
||||
# talosctl get sbom kernel -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: runtime
|
||||
type: SBOMItems.talos.dev
|
||||
id: kernel
|
||||
version: 1
|
||||
owner: runtime.SBOMItemController
|
||||
phase: running
|
||||
created: 2025-07-24T14:20:29Z
|
||||
updated: 2025-07-24T14:20:29Z
|
||||
spec:
|
||||
name: kernel
|
||||
version: 6.12.38
|
||||
license: GPL-2.0-only
|
||||
cpes:
|
||||
- cpe:2.3:o:linux:linux_kernel:6.12.38:*:*:*:*:*:*:*
|
||||
```
|
||||
|
||||
## Scanning SBOMs
|
||||
|
||||
You can scan SBOMs for known vulnerabilities using tools like [Grype](https://github.com/anchore/grype).
|
||||
You will need two source files for scanning:
|
||||
|
||||
* The SBOM file in SPDX format.
|
||||
* The vulnerability exclusion database (VEX).
|
||||
|
||||
VEX database is used to filter out vulnerabilities that are not applicable to the specific software version or configuration,
|
||||
which helps to reduce false positives in vulnerability scanning.
|
||||
|
||||
In order to generate the VEX database, run the following command:
|
||||
|
||||
```bash
|
||||
docker run --rm --pull always ghcr.io/siderolabs/generate-vex:latest gen --target-version {{< release >}} > vex.json
|
||||
```
|
||||
|
||||
The basic command to scan the SBOM is as follows:
|
||||
|
||||
```bash
|
||||
grype sbom:talos-amd64.spdx.json
|
||||
```
|
||||
|
||||
With VEX database, the command becomes:
|
||||
|
||||
> Note: At the moment of writing, the scan with VEX database fails until this [PR](https://github.com/anchore/grype/pull/2798) is merged.
|
||||
|
||||
```bash
|
||||
grype sbom:talos-amd64.spdx.json --vex vex.json
|
||||
```
|
||||
55
website/content/v1.12/advanced/selinux.md
Normal file
55
website/content/v1.12/advanced/selinux.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
title: "SELinux"
|
||||
description: "SELinux security module support (experimental)."
|
||||
---
|
||||
|
||||
Talos Linux 1.10 introduces initial SELinux support.
|
||||
Talos currently contains a basic SELinux policy that is designed to protect the OS from workloads, including privileged pods.
|
||||
The policy denies access to machine configuration, prevents debuggers from attaching to system processes, however it is out of its scope to secure the Kubernetes components themselves.
|
||||
|
||||
## Configuration
|
||||
|
||||
SELinux is enabled by default in Talos 1.10 images.
|
||||
The default mode is permissive, as currently some CNI and CSI solutions as well as extensions are incompatible with it.
|
||||
For now, enforcing mode has only been tested with the Flannel CNI we ship by default.
|
||||
These missing parts are being worked on to make SELinux available for more use cases.
|
||||
|
||||
### Disabling SELinux
|
||||
|
||||
On some occasions, you may want to disable SELinux completely, mostly if even permissive mode
|
||||
is not compatible with some of your workloads or plugins.
|
||||
|
||||
SELinux also needs to be disabled if you are enabling AppArmor, because both security modules cannot be enabled at the same time.
|
||||
|
||||
If you want to disable SELinux, you can do so by adding `selinux=0` to the kernel cmdline.
|
||||
This is most commonly done when creating the image via the configuration in the Image Factory.
|
||||
|
||||
### Mode of operation
|
||||
|
||||
You can query the SELinux state with:
|
||||
|
||||
```shell
|
||||
$ talosctl --nodes <IP> get SecurityState
|
||||
NODE NAMESPACE TYPE ID VERSION SECUREBOOT UKISIGNINGKEYFINGERPRINT PCRSIGNINGKEYFINGERPRINT SELINUXSTATE
|
||||
172.20.0.2 runtime SecurityState securitystate 1 false enabled, permissive
|
||||
```
|
||||
|
||||
> Please note that SELinux is still in an experimental state in Talos Linux.
|
||||
> Extensions currently do not support enforcing mode, which is a known missing feature being worked on.
|
||||
> Expect some CNI and CSI plugins to not work in enforcing mode.
|
||||
> Please report the issues you encounter with different configurations to help cover various usage scenarios.
|
||||
> Enforcing mode should only be enabled on new installs as of version 1.10, since the upgrade path for enabling SELinux is still being worked on.
|
||||
|
||||
As for version 1.10, SELinux runs in permissive mode by default, which does not offer any extra protection, but allows to log denials.
|
||||
SELinux can be put in enforcing mode (to actually prevent access when it is not authorized by the policy) by adding `enforcing=1` to the kernel cmdline.
|
||||
This is most commonly done via the configuration in the Image Factory.
|
||||
|
||||
## Obtaining and processing denial logs
|
||||
|
||||
If SELinux has blocked some event from happening, it will log it to the audit log.
|
||||
If the mode is permissive, the only implication of that would be a denial message, so permissive mode is useful for prototyping the policy.
|
||||
You can check the logs with:
|
||||
|
||||
`talosctl --nodes <IP> logs auditd > audit.log`
|
||||
|
||||
You can get more insights on SELinux policy inner workings in the corresponding section of the [Developing Talos]({{< relref "./developing-talos/#selinux-policy-debugging-and-development" >}}) page.
|
||||
102
website/content/v1.12/advanced/static-pods.md
Normal file
102
website/content/v1.12/advanced/static-pods.md
Normal file
@ -0,0 +1,102 @@
|
||||
---
|
||||
title: "Static Pods"
|
||||
description: "Using Talos Linux to set up static pods in Kubernetes."
|
||||
aliases:
|
||||
- ../guides/static-pods
|
||||
---
|
||||
|
||||
## Static Pods
|
||||
|
||||
Static pods are run directly by the `kubelet` bypassing the Kubernetes API server checks and validations.
|
||||
Most of the time `DaemonSet` is a better alternative to static pods, but some workloads need to run
|
||||
before the Kubernetes API server is available or might need to bypass security restrictions imposed by the API server.
|
||||
|
||||
See [Kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/) for more information on static pods.
|
||||
|
||||
## Configuration
|
||||
|
||||
Static pod definitions are specified in the Talos machine configuration:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
pods:
|
||||
- apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: nginx
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx
|
||||
image: nginx
|
||||
```
|
||||
|
||||
Talos renders static pod definitions to the `kubelet` using a local HTTP server, `kubelet` picks up the definition and launches the pod.
|
||||
|
||||
Talos accepts changes to the static pod configuration without a reboot.
|
||||
|
||||
To see a full list of static pods, use `talosctl get staticpods`, and to see the status of the static pods (as reported by the `kubelet`), use `talosctl get staticpodstatus`.
|
||||
|
||||
## Usage
|
||||
|
||||
Kubelet mirrors pod definition to the API server state, so static pods can be inspected with `kubectl get pods`, logs can be retrieved with `kubectl logs`, etc.
|
||||
|
||||
```bash
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
nginx-talos-default-controlplane-2 1/1 Running 0 17s
|
||||
```
|
||||
|
||||
If the API server is not available, status of the static pod can also be inspected with `talosctl containers --kubernetes`:
|
||||
|
||||
```bash
|
||||
$ talosctl containers --kubernetes
|
||||
NODE NAMESPACE ID IMAGE PID STATUS
|
||||
172.20.0.3 k8s.io default/nginx-talos-default-controlplane-2 registry.k8s.io/pause:3.6 4886 SANDBOX_READY
|
||||
172.20.0.3 k8s.io └─ default/nginx-talos-default-controlplane-2:nginx:4183a7d7a771 docker.io/library/nginx:latest
|
||||
...
|
||||
```
|
||||
|
||||
Logs of static pods can be retrieved with `talosctl logs --kubernetes`:
|
||||
|
||||
```bash
|
||||
$ talosctl logs --kubernetes default/nginx-talos-default-controlplane-2:nginx:4183a7d7a771
|
||||
172.20.0.3: 2022-02-10T15:26:01.289208227Z stderr F 2022/02/10 15:26:01 [notice] 1#1: using the "epoll" event method
|
||||
172.20.0.3: 2022-02-10T15:26:01.2892466Z stderr F 2022/02/10 15:26:01 [notice] 1#1: nginx/1.21.6
|
||||
172.20.0.3: 2022-02-10T15:26:01.28925723Z stderr F 2022/02/10 15:26:01 [notice] 1#1: built by gcc 10.2.1 20210110 (Debian 10.2.1-6)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Talos doesn't perform any validation on the static pod definitions.
|
||||
If the pod isn't running, use `kubelet` logs (`talosctl logs kubelet`) to find the problem:
|
||||
|
||||
```bash
|
||||
$ talosctl logs kubelet
|
||||
172.20.0.2: {"ts":1644505520281.427,"caller":"config/file.go:187","msg":"Could not process manifest file","path":"/etc/kubernetes/manifests/talos-default-nginx-gvisor.yaml","err":"invalid pod: [spec.containers: Required value]"}
|
||||
```
|
||||
|
||||
## Resource Definitions
|
||||
|
||||
Static pod definitions are available as `StaticPod` resources combined with Talos-generated control plane static pods:
|
||||
|
||||
```bash
|
||||
$ talosctl get staticpods
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.3 k8s StaticPod default-nginx 1
|
||||
172.20.0.3 k8s StaticPod kube-apiserver 1
|
||||
172.20.0.3 k8s StaticPod kube-controller-manager 1
|
||||
172.20.0.3 k8s StaticPod kube-scheduler 1
|
||||
```
|
||||
|
||||
Talos assigns ID `<namespace>-<name>` to the static pods specified in the machine configuration.
|
||||
|
||||
On control plane nodes status of the running static pods is available in the `StaticPodStatus` resource:
|
||||
|
||||
```bash
|
||||
$ talosctl get staticpodstatus
|
||||
NODE NAMESPACE TYPE ID VERSION READY
|
||||
172.20.0.3 k8s StaticPodStatus default/nginx-talos-default-controlplane-2 2 True
|
||||
172.20.0.3 k8s StaticPodStatus kube-system/kube-apiserver-talos-default-controlplane-2 2 True
|
||||
172.20.0.3 k8s StaticPodStatus kube-system/kube-controller-manager-talos-default-controlplane-2 3 True
|
||||
172.20.0.3 k8s StaticPodStatus kube-system/kube-scheduler-talos-default-controlplane-2 3 True
|
||||
```
|
||||
157
website/content/v1.12/advanced/talos-api-access-from-k8s.md
Normal file
157
website/content/v1.12/advanced/talos-api-access-from-k8s.md
Normal file
@ -0,0 +1,157 @@
|
||||
---
|
||||
title: "Talos API access from Kubernetes"
|
||||
description: "How to access Talos API from within Kubernetes."
|
||||
aliases:
|
||||
- ../guides/talos-api-access-from-k8s
|
||||
---
|
||||
|
||||
In this guide, we will enable the Talos feature to access the Talos API from within Kubernetes.
|
||||
|
||||
## Enabling the Feature
|
||||
|
||||
Edit the machine configuration to enable the feature, specifying the Kubernetes namespaces from which Talos API
|
||||
can be accessed and the allowed Talos API roles.
|
||||
|
||||
```bash
|
||||
talosctl -n 172.20.0.2 edit machineconfig
|
||||
```
|
||||
|
||||
Configure the `kubernetesTalosAPIAccess` like the following:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
machine:
|
||||
features:
|
||||
kubernetesTalosAPIAccess:
|
||||
enabled: true
|
||||
allowedRoles:
|
||||
- os:reader
|
||||
allowedKubernetesNamespaces:
|
||||
- default
|
||||
```
|
||||
|
||||
## Injecting Talos ServiceAccount into manifests
|
||||
|
||||
Create the following manifest file `deployment.yaml`:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: talos-api-access
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: talos-api-access
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: talos-api-access
|
||||
spec:
|
||||
containers:
|
||||
- name: talos-api-access
|
||||
image: alpine:3
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
wget -O /usr/local/bin/talosctl https://github.com/siderolabs/talos/releases/download/{{< release >}}/talosctl-linux-amd64
|
||||
chmod +x /usr/local/bin/talosctl
|
||||
while true; talosctl -n 172.20.0.2 version; do sleep 1; done
|
||||
```
|
||||
|
||||
**Note:** make sure that you replace the IP `172.20.0.2` with a valid Talos node IP.
|
||||
|
||||
Use `talosctl inject serviceaccount` command to inject the Talos ServiceAccount into the manifest.
|
||||
|
||||
```bash
|
||||
talosctl inject serviceaccount -f deployment.yaml > deployment-injected.yaml
|
||||
```
|
||||
|
||||
Inspect the generated manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: talos-api-access
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: talos-api-access
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
labels:
|
||||
app: talos-api-access
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
wget -O /usr/local/bin/talosctl https://github.com/siderolabs/talos/releases/download/{{< release >}}/talosctl-linux-amd64
|
||||
chmod +x /usr/local/bin/talosctl
|
||||
while true; talosctl -n 172.20.0.2 version; do sleep 1; done
|
||||
image: alpine:3
|
||||
name: talos-api-access
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/secrets/talos.dev
|
||||
name: talos-secrets
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
volumes:
|
||||
- name: talos-secrets
|
||||
secret:
|
||||
secretName: talos-api-access-talos-secrets
|
||||
status: {}
|
||||
---
|
||||
apiVersion: talos.dev/v1alpha1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: talos-api-access-talos-secrets
|
||||
spec:
|
||||
roles:
|
||||
- os:reader
|
||||
---
|
||||
```
|
||||
|
||||
As you can notice, your deployment manifest is now injected with the Talos ServiceAccount.
|
||||
|
||||
## Testing API Access
|
||||
|
||||
Apply the new manifest into `default` namespace:
|
||||
|
||||
```bash
|
||||
kubectl apply -n default -f deployment-injected.yaml
|
||||
```
|
||||
|
||||
Follow the logs of the pods belong to the deployment:
|
||||
|
||||
```bash
|
||||
kubectl logs -n default -f -l app=talos-api-access
|
||||
```
|
||||
|
||||
You'll see a repeating output similar to the following:
|
||||
|
||||
```text
|
||||
Client:
|
||||
Tag: <talos version>
|
||||
SHA: ....
|
||||
Built:
|
||||
Go version: go1.18.4
|
||||
OS/Arch: linux/amd64
|
||||
Server:
|
||||
NODE: 172.20.0.2
|
||||
Tag: <talos version>
|
||||
SHA: ...
|
||||
Built:
|
||||
Go version: go1.18.4
|
||||
OS/Arch: linux/amd64
|
||||
Enabled: RBAC
|
||||
```
|
||||
|
||||
This means that the pod can talk to Talos API of node 172.20.0.2 successfully.
|
||||
37
website/content/v1.12/advanced/verifying-images.md
Normal file
37
website/content/v1.12/advanced/verifying-images.md
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
title: "Verifying Images"
|
||||
description: "Verifying Talos container image signatures."
|
||||
---
|
||||
|
||||
Sidero Labs signs the container images generated for the Talos release with [cosign](https://docs.sigstore.dev/cosign/overview/):
|
||||
|
||||
* `ghcr.io/siderolabs/installer` (Talos installer)
|
||||
* `ghcr.io/siderolabs/talos` (Talos image for container runtime)
|
||||
* `ghcr.io/siderolabs/talosctl` (`talosctl` client packaged as a container image)
|
||||
* `ghcr.io/siderolabs/imager` (Talos install image generator)
|
||||
* all [system extension images](https://github.com/siderolabs/extensions/)
|
||||
|
||||
## Verifying Container Image Signatures
|
||||
|
||||
The `cosign` tool can be used to verify the signatures of the Talos container images:
|
||||
|
||||
```bash
|
||||
$ cosign verify --certificate-identity-regexp '@siderolabs\.com$' --certificate-oidc-issuer https://accounts.google.com ghcr.io/siderolabs/installer:v1.4.0
|
||||
|
||||
Verification for ghcr.io/siderolabs/installer:v1.4.0 --
|
||||
The following checks were performed on each of these signatures:
|
||||
- The cosign claims were validated
|
||||
- Existence of the claims in the transparency log was verified offline
|
||||
- The code-signing certificate was verified using trusted certificate authority certificates
|
||||
|
||||
[{"critical":{"identity":{"docker-reference":"ghcr.io/siderolabs/installer"},"image":{"docker-manifest-digest":"sha256:f41795cc88f40eb1bc6b3c638c4a3123f6ef3c90627bfc35c04ebab82581e3ee"},"type":"cosign container image signature"},"optional":{"1.3.6.1.4.1.57264.1.1":"https://accounts.google.com","Bundle":{"SignedEntryTimestamp":"MEQCIERkQpgEnPWnfjUHIWO9QxC9Ute3/xJOc7TO5GUnu59xAiBKcFvrDWHoUYChT0/+gaazTrI+r0/GWSbi+Q+sEQ5AKA==","Payload":{"body":"eyJhcGlWZXJzaW9uIjoiMC4wLjEiLCJraW5kIjoiaGFzaGVkcmVrb3JkIiwic3BlYyI6eyJkYXRhIjp7Imhhc2giOnsiYWxnb3JpdGhtIjoic2hhMjU2IiwidmFsdWUiOiJkYjhjYWUyMDZmODE5MDlmZmI4NjE4ZjRkNjIzM2ZlYmM3NzY5MzliOGUxZmZkMTM1ODA4ZmZjNDgwNjYwNGExIn19LCJzaWduYXR1cmUiOnsiY29udGVudCI6Ik1FVUNJUURQWXhiVG5vSDhJTzBEakRGRE9rNU1HUjRjMXpWMys3YWFjczNHZ2J0TG1RSWdHczN4dVByWUgwQTAvM1BSZmZydDRYNS9nOUtzQVdwdG9JbE9wSDF0NllrPSIsInB1YmxpY0tleSI6eyJjb250ZW50IjoiTFMwdExTMUNSVWRKVGlCRFJWSlVTVVpKUTBGVVJTMHRMUzB0Q2sxSlNVTXhha05EUVd4NVowRjNTVUpCWjBsVlNIbEhaRTFQVEhkV09WbFFSbkJYUVRKb01qSjRVM1ZIZVZGM2QwTm5XVWxMYjFwSmVtb3dSVUYzVFhjS1RucEZWazFDVFVkQk1WVkZRMmhOVFdNeWJHNWpNMUoyWTIxVmRWcEhWakpOVWpSM1NFRlpSRlpSVVVSRmVGWjZZVmRrZW1SSE9YbGFVekZ3WW01U2JBcGpiVEZzV2tkc2FHUkhWWGRJYUdOT1RXcE5kMDVFUlRSTlZHZDZUbXBWTlZkb1kwNU5hazEzVGtSRk5FMVVaekJPYWxVMVYycEJRVTFHYTNkRmQxbElDa3R2V2tsNmFqQkRRVkZaU1V0dldrbDZhakJFUVZGalJGRm5RVVZaUVdKaVkwbDZUVzR3ZERBdlVEZHVUa0pNU0VscU1rbHlORTFQZGpoVVRrVjZUemNLUkVadVRXSldVbGc0TVdWdmExQnVZblJHTVZGMmRWQndTVm95VkV3NFFUUkdSMWw0YldFeGJFTk1kMkk0VEZOVWMzRlBRMEZZYzNkblowWXpUVUUwUndwQk1WVmtSSGRGUWk5M1VVVkJkMGxJWjBSQlZFSm5UbFpJVTFWRlJFUkJTMEpuWjNKQ1owVkdRbEZqUkVGNlFXUkNaMDVXU0ZFMFJVWm5VVlZqYWsweUNrbGpVa1lyTkhOVmRuRk5ia3hsU0ZGMVJIRkdRakZqZDBoM1dVUldVakJxUWtKbmQwWnZRVlV6T1ZCd2VqRlphMFZhWWpWeFRtcHdTMFpYYVhocE5Ga0tXa1E0ZDB0M1dVUldVakJTUVZGSUwwSkRSWGRJTkVWa1dWYzFhMk50VmpWTWJrNTBZVmhLZFdJeldrRmpNbXhyV2xoS2RtSkhSbWxqZVRWcVlqSXdkd3BMVVZsTFMzZFpRa0pCUjBSMmVrRkNRVkZSWW1GSVVqQmpTRTAyVEhrNWFGa3lUblprVnpVd1kzazFibUl5T1c1aVIxVjFXVEk1ZEUxRGMwZERhWE5IQ2tGUlVVSm5OemgzUVZGblJVaFJkMkpoU0ZJd1kwaE5Oa3g1T1doWk1rNTJaRmMxTUdONU5XNWlNamx1WWtkVmRWa3lPWFJOU1VkTFFtZHZja0puUlVVS1FXUmFOVUZuVVVOQ1NIZEZaV2RDTkVGSVdVRXpWREIzWVhOaVNFVlVTbXBIVWpSamJWZGpNMEZ4U2t0WWNtcGxVRXN6TDJnMGNIbG5Remh3TjI4MFFRcEJRVWRJYkdGbVp6Um5RVUZDUVUxQlVucENSa0ZwUVdKSE5tcDZiVUkyUkZCV1dUVXlWR1JhUmtzeGVUSkhZVk5wVW14c1IydHlSRlpRVXpsSmJGTktDblJSU1doQlR6WlZkbnBFYVVOYVFXOXZSU3RLZVdwaFpFdG5hV2xLT1RGS00yb3ZZek5CUTA5clJIcFhOamxaVUUxQmIwZERRM0ZIVTAwME9VSkJUVVFLUVRKblFVMUhWVU5OUVZCSlRUVjJVbVpIY0VGVWNqQTJVR1JDTURjeFpFOXlLMHhFSzFWQ04zbExUVWRMWW10a1UxTnJaMUp5U3l0bGNuZHdVREp6ZGdvd1NGRkdiM2h0WlRkM1NYaEJUM2htWkcxTWRIQnpjazFJZGs5cWFFSmFTMVoxVG14WmRXTkJaMVF4V1VWM1ZuZHNjR2QzYTFWUFdrWjRUemRrUnpONkNtVnZOWFJ3YVdoV1kyTndWMlozUFQwS0xTMHRMUzFGVGtRZ1EwVlNWRWxHU1VOQlZFVXRMUzB0TFFvPSJ9fX19","integratedTime":1681843022,"logIndex":18304044,"logID":"c0d23d6ad406973f9559f3ba2d1ca01f84147d8ffc5b8445c224f98b9591801d"}},"Issuer":"https://accounts.google.com","Subject":"andrey.smirnov@siderolabs.com"}}]
|
||||
```
|
||||
|
||||
The image should be signed using [cosign certificate authority flow](https://docs.sigstore.dev/certificate_authority/certificate-issuing-overview/) by a Sidero Labs employee with and email from `siderolabs.com` domain.
|
||||
|
||||
## Reproducible Builds
|
||||
|
||||
Talos builds for `kernel`, `initramfs`, `talosctl`, ISO image, and container images are reproducible.
|
||||
So you can verify that the build is the same as the one as provided on [GitHub releases page](https://github.com/siderolabs/talos/releases).
|
||||
|
||||
See [building Talos images]({{< relref "building-images" >}}) for more details.
|
||||
69
website/content/v1.12/advanced/watchdog.md
Normal file
69
website/content/v1.12/advanced/watchdog.md
Normal file
@ -0,0 +1,69 @@
|
||||
---
|
||||
title: "Watchdog Timers"
|
||||
description: "Using hardware watchdogs to workaround hardware/software lockups."
|
||||
---
|
||||
|
||||
Talos Linux now supports hardware watchdog timers configuration.
|
||||
Hardware watchdog timers allow to reset (reboot) the system if the software stack becomes unresponsive.
|
||||
Please consult your hardware/VM documentation for the availability of the hardware watchdog timers.
|
||||
|
||||
## Configuration
|
||||
|
||||
To discover the available watchdog devices, run:
|
||||
|
||||
```shell
|
||||
$ talosctl ls /sys/class/watchdog/
|
||||
NODE NAME
|
||||
172.20.0.2 .
|
||||
172.20.0.2 watchdog0
|
||||
172.20.0.2 watchdog1
|
||||
```
|
||||
|
||||
The implementation of the watchdog device can be queried with:
|
||||
|
||||
```shell
|
||||
$ talosctl read /sys/class/watchdog/watchdog0/identity
|
||||
i6300ESB timer
|
||||
```
|
||||
|
||||
To enable the watchdog timer, patch the machine configuration with the following:
|
||||
|
||||
```yaml
|
||||
# watchdog.yaml
|
||||
apiVersion: v1alpha1
|
||||
kind: WatchdogTimerConfig
|
||||
device: /dev/watchdog0
|
||||
timeout: 5m
|
||||
```
|
||||
|
||||
```shell
|
||||
talosctl patch mc -p @watchdog.yaml
|
||||
```
|
||||
|
||||
Talos Linux will set up the watchdog time with a 5-minute timeout, and it will keep resetting the timer to prevent the system from rebooting.
|
||||
If the software becomes unresponsive, the watchdog timer will expire, and the system will be reset by the watchdog hardware.
|
||||
|
||||
## Inspection
|
||||
|
||||
To inspect the watchdog timer configuration, run:
|
||||
|
||||
```shell
|
||||
$ talosctl get watchdogtimerconfig
|
||||
NODE NAMESPACE TYPE ID VERSION DEVICE TIMEOUT
|
||||
172.20.0.2 runtime WatchdogTimerConfig timer 1 /dev/watchdog0 5m0s
|
||||
```
|
||||
|
||||
To inspect the watchdog timer status, run:
|
||||
|
||||
```shell
|
||||
$ talosctl get watchdogtimerstatus
|
||||
NODE NAMESPACE TYPE ID VERSION DEVICE TIMEOUT
|
||||
172.20.0.2 runtime WatchdogTimerStatus timer 1 /dev/watchdog0 5m0s
|
||||
```
|
||||
|
||||
Current status of the watchdog timer can also be inspected via Linux sysfs:
|
||||
|
||||
```shell
|
||||
$ talosctl read /sys/class/watchdog/watchdog0/state
|
||||
active
|
||||
```
|
||||
4
website/content/v1.12/introduction/_index.md
Normal file
4
website/content/v1.12/introduction/_index.md
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
title: "Introduction"
|
||||
weight: 10
|
||||
---
|
||||
231
website/content/v1.12/introduction/getting-started.md
Normal file
231
website/content/v1.12/introduction/getting-started.md
Normal file
@ -0,0 +1,231 @@
|
||||
---
|
||||
title: Getting Started
|
||||
weight: 30
|
||||
description: "A guide to setting up a Talos cluster"
|
||||
---
|
||||
|
||||
This guide walks you through creating a simple Talos cluster with one control plane node and one or more worker nodes.
|
||||
|
||||
If you're looking to set up a cluster with multiple control plane nodes, see [Production Notes]({{< relref "prodnotes" >}}).
|
||||
|
||||
**New to Talos?** Start with [Quickstart]({{< relref "quickstart" >}}) to create a local virtual cluster on your workstation.
|
||||
|
||||
**Planning for production?** See [Production Notes]({{< relref "prodnotes" >}}) for additional requirements and best practices.
|
||||
|
||||
**Installing on cloud or virtualized platforms?** Check out the [platform-specific guides]({{< relref "../talos-guides/install" >}}) for installation methods tailored to different environments.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
To create a Kubernetes cluster with Talos, you’ll need to:
|
||||
|
||||
- **Install talosctl**: `talosctl` is the CLI tool used to interact with the Talos API.
|
||||
Since Talos Linux does not have SSH access, `talosctl` is the primary tool for managing and configuring your Talos machines
|
||||
|
||||
You can install `talosctl` on macOS or Linux by running:
|
||||
|
||||
```bash
|
||||
brew install siderolabs/tap/talosctl
|
||||
```
|
||||
|
||||
Refer to the [talosctl installation guide]({{< relref "../talos-guides/install/talosctl" >}}) for installation on other platforms.
|
||||
|
||||
- **Ensure network access**: Your machines will need internet access to download the Talos installer and container images, sync time, and more.
|
||||
|
||||
If you’re working in a restricted network environment, check out the official documentation on using [registry proxies]({{< relref "../talos-guides/configuration/pull-through-cache" >}}), local registries, or setting up an [air-gapped installation]({{< relref "../advanced/air-gapped" >}}).
|
||||
|
||||
## Talos Cluster Setup Overview
|
||||
|
||||
Every Talos cluster follows the same process, regardless of where you deploy it:
|
||||
|
||||
1. **Boot** - Start machines with the Talos Linux image
|
||||
1. **Configure** - Create a root of trust certificate authority and generate configuration files
|
||||
1. **Apply** - Apply machine configurations to the nodes
|
||||
1. **Connect** - Set up your local `talosctl` client
|
||||
1. **Bootstrap** - Initialize the Kubernetes cluster.
|
||||
|
||||
**Note**: You can also use [Omni](https://www.siderolabs.com/omni-signup/) to create a Talos cluster that spans multiple platforms, including bare metal, cloud providers, and virtual machines.
|
||||
|
||||
Let's walk through each step and create a Talos cluster.
|
||||
|
||||
## Step 1: Download The Talos Linux Image
|
||||
|
||||
Get the latest ISO for your architecture from our [Image factory](https://factory.talos.dev/).
|
||||
|
||||
## Step 2: Boot Your Machine
|
||||
|
||||
Boot your hardware using the ISO you just downloaded.
|
||||
At this stage, you'll:
|
||||
|
||||
- Boot one machine as your control plane node.
|
||||
- Boot additional machines as worker nodes (this is optional).
|
||||
|
||||
You’ll see the Talos dashboard once your hardware boots from the ISO image.
|
||||
|
||||
**Note:** The ISO runs entirely in RAM and won't modify your disks until you apply a configuration.
|
||||
|
||||
**Troubleshooting network connectivity:** If your machine fails to establish a network connection after booting, you may need to add network drivers through system extensions.
|
||||
Add these extensions to your Talos image via the [Image factory](https://factory.talos.dev/), or see the [system extensions repository](https://github.com/siderolabs/extensions) for more information.
|
||||
|
||||
## Step 3: Store Your Node IP Addresses in a Variable
|
||||
|
||||
To create variables for your machines’ IP addresses:
|
||||
|
||||
1. Copy the IP address displayed on each machine console, including the control plane and any worker nodes you’ve created.
|
||||
|
||||
If you don’t have a display connected, retrieve the IP addresses from your DHCP server.
|
||||
|
||||

|
||||
|
||||
1. Create a variable for your control plane node’s IP address by replacing `<your-control-plane-ip>` with the actual IP:
|
||||
|
||||
```bash
|
||||
export CONTROL_PLANE_IP=<your-control-plane-ip>
|
||||
```
|
||||
|
||||
1. If you have worker nodes, store their IP addresses in a Bash array.
|
||||
Replace each `<worker-ip>` placeholder with the actual IP address of a worker node.
|
||||
You can include as many IP addresses as needed:
|
||||
|
||||
```bash
|
||||
WORKER_IP=("<worker-ip-1>" "<worker-ip-2>" "<worker-ip-3>"...)
|
||||
```
|
||||
|
||||
## Step 4: Unmount the ISO
|
||||
|
||||
Unplug your installation USB drive or unmount the ISO.
|
||||
This prevents you from accidentally installing to the USB drive and makes it clearer which disk to select for installation.
|
||||
|
||||
## Step 5: Learn About Your Installation Disks
|
||||
|
||||
When you first boot your machine from the ISO, Talos runs temporarily in memory.
|
||||
This means that your Talos nodes, configurations, and cluster membership won't survive reboots or power cycles.
|
||||
|
||||
However, once you apply the machine configuration (which you'll do later in this guide), you'll install Talos, its complete operating system, and your configuration to a specified disk for permanent storage.
|
||||
|
||||
Run this command to view all the available disks on your control plane:
|
||||
|
||||
```bash
|
||||
talosctl get disks --insecure --nodes $CONTROL_PLANE_IP
|
||||
```
|
||||
|
||||
Note the disk ID (e.g., `sda`, `vda`) as you will use it in the next step.
|
||||
|
||||
## Step 6: Generate Cluster Configuration
|
||||
|
||||
Talos Linux is configured entirely using declarative configuration files avoiding the need to deal with SSH and running commands.
|
||||
|
||||
To generate these declarative configuration files:
|
||||
|
||||
1. Define variables for your cluster name and the disk ID from step 5.
|
||||
Replace the placeholders with your actual values:
|
||||
|
||||
```bash
|
||||
export CLUSTER_NAME=<cluster_name>
|
||||
export DISK_NAME=<control_plane_disk_name>
|
||||
```
|
||||
|
||||
1. Run this command to generate the configuration file:
|
||||
|
||||
```bash
|
||||
talosctl gen config $CLUSTER_NAME https://$CONTROL_PLANE_IP:6443 --install-disk /dev/$DISK_NAME
|
||||
```
|
||||
|
||||
This command generates machine configurations that specify the Kubernetes API endpoint (which is your control plane node's IP) for cluster communication and the target disk for the Talos installation.
|
||||
|
||||
You'll get three files from this command:
|
||||
|
||||
- **controlplane.yaml**: The configuration for your control plane.
|
||||
- **worker.yaml**: The configuration for your worker nodes.
|
||||
- **talosconfig**: Your `talosctl` configuration file, used to connect to and authenticate access to your cluster.
|
||||
|
||||
## Step 7: Apply Configurations
|
||||
|
||||
Now that you've created your configurations, it's time to apply them to bring your nodes and cluster online:
|
||||
|
||||
1. Run this command to apply the control plane configuration:
|
||||
|
||||
```bash
|
||||
talosctl apply-config --insecure --nodes $CONTROL_PLANE_IP --file controlplane.yaml
|
||||
```
|
||||
|
||||
1. Next, apply the worker node configuration:
|
||||
|
||||
```bash
|
||||
for ip in "${WORKER_IP[@]}"; do
|
||||
echo "Applying config to worker node: $ip"
|
||||
talosctl apply-config --insecure --nodes "$ip" --file worker.yaml
|
||||
done
|
||||
```
|
||||
|
||||
## Step 8: Set your endpoints
|
||||
|
||||
Set your endpoints with this:
|
||||
|
||||
```bash
|
||||
talosctl --talosconfig=./talosconfig config endpoints $CONTROL_PLANE_IP
|
||||
```
|
||||
|
||||
## Step 9: Bootstrap Your Etcd Cluster
|
||||
|
||||
Wait for your control plane node to finish booting, then bootstrap your etcd cluster by running:
|
||||
|
||||
```bash
|
||||
talosctl bootstrap --nodes $CONTROL_PLANE_IP --talosconfig=./talosconfig
|
||||
```
|
||||
|
||||
**Note:** Run this command ONCE on a SINGLE control plane node.
|
||||
If you have multiple control plane nodes, you can choose any of them.
|
||||
|
||||
## Step 10: Get Kubernetes Access
|
||||
|
||||
Download your `kubeconfig` file to start using `kubectl`.
|
||||
|
||||
You have two download options: you can either merge your Kubernetes configurations **OR** keep them separate.
|
||||
|
||||
Here’s how to do both:
|
||||
|
||||
- Merge your new cluster into your local Kubernetes configuration:
|
||||
|
||||
```bash
|
||||
talosctl kubeconfig --nodes $CONTROL_PLANE_IP --talosconfig=./talosconfig
|
||||
```
|
||||
|
||||
- Specify a filename if you prefer not to merge with your default Kubernetes configuration:
|
||||
|
||||
```bash
|
||||
talosctl kubeconfig alternative-kubeconfig --nodes $CONTROL_PLANE_IP --talosconfig=./talosconfig
|
||||
export KUBECONFIG=./alternative-kubeconfig
|
||||
|
||||
```
|
||||
|
||||
## Step 11: Check Cluster Health
|
||||
|
||||
Run the following command to check the health of your nodes:
|
||||
|
||||
```bash
|
||||
talosctl --nodes $CONTROL_PLANE_IP --talosconfig=./talosconfig health
|
||||
```
|
||||
|
||||
## Step 11: Verify Node Registration
|
||||
|
||||
Confirm that your nodes are registered in Kubernetes:
|
||||
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
You should see your control plane and worker nodes listed with a **Ready** status.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations!
|
||||
You now have a working Kubernetes cluster on Talos Linux .
|
||||
|
||||
For a list of all the commands and operations that `talosctl` provides, see the CLI reference.
|
||||
|
||||
### What's Next?
|
||||
|
||||
- [Set up persistent storage]({{< relref "../kubernetes-guides/configuration/storage" >}})
|
||||
- [Deploy a Metrics Server]({{< relref "../kubernetes-guides/configuration/deploy-metrics-server" >}})
|
||||
- [Explore the talosctl CLI reference]({{< relref "../reference/cli" >}})
|
||||
- [Plan your production deployment]({{< relref "prodnotes" >}})
|
||||
389
website/content/v1.12/introduction/prodnotes.md
Normal file
389
website/content/v1.12/introduction/prodnotes.md
Normal file
@ -0,0 +1,389 @@
|
||||
---
|
||||
title: Production Clusters
|
||||
weight: 30
|
||||
description: "Recommendations for setting up a Talos Linux cluster in production."
|
||||
---
|
||||
|
||||
This guide explains things to consider to create a production quality Talos Linux cluster for bare metal.
|
||||
Check out the [Reference Architecture documentation](https://www.siderolabs.com/resource-hub/resources/) for architectural diagrams and guidance on creating production-grade clusters in other environments.
|
||||
|
||||
This guide assumes that you’ve already created a development cluster and are familiar with the **Getting Started** documentation.
|
||||
If not, please refer to the [Getting Started]({{< relref "getting-started" >}}) guide for more information.
|
||||
|
||||
When moving from a learning environment to a production-ready Talos Linux cluster, you have to consider several critical factors:
|
||||
|
||||
- High availability for your control plane nodes.
|
||||
- Secure configuration management.
|
||||
- Reliability for continuous service and minimal downtime.
|
||||
- Authentication for access control.
|
||||
|
||||
Follow the steps below to build a production-grade Talos cluster that is highly available, reliable, and secure.
|
||||
|
||||
**Note**: Check out [Omni](https://www.siderolabs.com/omni-signup/) for managing large-scale Talos Linux clusters automatically.
|
||||
|
||||
## Step 1: Prepare Your Infrastructure
|
||||
|
||||
To create your production cluster infrastructure :
|
||||
|
||||
1. Boot your machines using the Talos ISO image
|
||||
1. Ensure network access on your nodes.
|
||||
|
||||
Here is how to do each step:
|
||||
|
||||
### Boot Your Machines Using the Talos ISO Image
|
||||
|
||||
Follow these steps to boot your machines using the Talos ISO image:
|
||||
|
||||
1. Download the latest ISO for your infrastructure depending on the hardware type from the [Talos Image factory](https://factory.talos.dev/).
|
||||
|
||||
**Note**: For network booting and self-built media using published kernel there are a number of required kernel parameters.
|
||||
Please see the [kernel docs]({{< relref "../reference/kernel" >}}) getting-started for more information.
|
||||
|
||||
1. Boot three control planes using the ISO image you just downloaded.
|
||||
1. Boot additional machines as worker nodes.
|
||||
|
||||
### Ensure Network Access
|
||||
|
||||
If your nodes are behind a firewall, in a private network, or otherwise not directly reachable, you would need to configure a load balancer to forward TCP port 50000 to reach the nodes for Talos API access.
|
||||
|
||||
**Note**: Because the Talos Linux API uses gRPC and mutual TLS, it cannot be proxied by a HTTP/S proxy, but only by a TCP load balancer.
|
||||
|
||||
With your control plane and worker nodes booted, next configure your Kubernetes endpoint.
|
||||
|
||||
## Step 2: Store Your IP Addresses in a Variable
|
||||
|
||||
To store variables for your machines’ IP addresses:
|
||||
|
||||
1. Copy the IP address displayed on each machine console, including the control plane and any worker nodes you’ve created.
|
||||
|
||||
If you don’t have a display connected, retrieve the IP addresses from your DHCP server.
|
||||
|
||||

|
||||
|
||||
1. Create a Bash array for your control plane node IP addresses, replacing each `<control-plane-ip>` placeholder with the IP address of a control plane node.
|
||||
You can include as many IP addresses as needed:
|
||||
|
||||
```bash
|
||||
CONTROL_PLANE_IP=("<control-plane-ip-1>" "<control-plane-ip-2>" "<control-plane-ip-3>")
|
||||
```
|
||||
|
||||
**For example**:
|
||||
|
||||
If your control plane nodes IP addresses are `192.168.0.2`, `192.168.0.3`, `192.168.0.4`, your command would be:
|
||||
|
||||
```bash
|
||||
CONTROL_PLANE_IP= ("192.168.0.2" "192.168.0.3" "192.168.0.4")
|
||||
```
|
||||
|
||||
1. If you have worker nodes, store their IP addresses in a Bash array.
|
||||
Replace each `<worker-ip>` placeholder with the actual IP address of a worker node.
|
||||
You can include as many IP addresses as needed:
|
||||
|
||||
```bash
|
||||
WORKER_IP=("<worker-ip-1>" "<worker-ip-2>" "<worker-ip-3>")
|
||||
```
|
||||
|
||||
## Step 3: Decide Your Kubernetes Endpoint
|
||||
|
||||
You've set up multiple control planes for high availability, but they only provide true high availability if the Kubernetes API server endpoint can reach all control plane nodes.
|
||||
|
||||
Here are two common ways to configure this:
|
||||
|
||||
- **Dedicated load balancer**: Set a dedicated load balancer that route to your control plane nodes.
|
||||
- **DNS records**: Create multiple DNS records that point to all your control plane nodes
|
||||
|
||||
With these, you can pass in one IP address or DNS name during setup that route to all your control plane nodes.
|
||||
|
||||
Here is how you can configure each option:
|
||||
|
||||
### Dedicated Load Balancer
|
||||
|
||||
If you're using a cloud provider or have your own load balancer (such as HAProxy, an NGINX reverse proxy, or an F5 load balancer), setting up a dedicated load balancer is a natural choice.
|
||||
|
||||
It is also important to note that if you [created the cluster with Omni](https://omni.siderolabs.com/tutorials/getting_started), Omni will automatically be a load balancer for your Kubernetes endpoint.
|
||||
|
||||
Configure a frontend to listen on TCP port 6443 and direct traffic to the addresses of your Talos control plane nodes.
|
||||
|
||||
Your Kubernetes endpoint will be the IP address or DNS name of the load balancer's frontend, with the port appended, for example, `https://myK8s.mydomain.io:6443`.
|
||||
|
||||
**Note**: You cannot use a HTTP load balancer, because the Kubernetes API server handles TLS termination and mutual TLS authentication.
|
||||
|
||||
### DNS Records
|
||||
|
||||
Additionally, you can configure your Kubernetes endpoint using DNS records.
|
||||
Simply, add multiple A or AAAA records, one for each control plane, to a DNS name.
|
||||
|
||||
For example, you can add:
|
||||
|
||||
```url
|
||||
kube.cluster1.mydomain.com IN A 192.168.0.10
|
||||
kube.cluster1.mydomain.com IN A 192.168.0.11
|
||||
kube.cluster1.mydomain.com IN A 192.168.0.12
|
||||
```
|
||||
|
||||
Then your endpoint would be:
|
||||
|
||||
```url
|
||||
https://kube.cluster1.mydomain.com:6443
|
||||
```
|
||||
|
||||
## Step 4: Save Your Endpoint in a Variable
|
||||
|
||||
Set a variable to store the endpoint you chose in Step 3.
|
||||
Replace `<your_endpoint>` placeholder with your actual endpoint:
|
||||
|
||||
```bash
|
||||
export YOUR_ENDPOINT=<your_endpoint>
|
||||
```
|
||||
|
||||
## Step 5: Generate Secrets Bundle
|
||||
|
||||
The secrets bundle is a file that contains all the cryptographic keys, certificates, and tokens needed to secure your Talos Linux cluster.
|
||||
|
||||
To generate the secrets bundle, run:
|
||||
|
||||
```bash
|
||||
talosctl gen secrets -o secrets.yaml
|
||||
```
|
||||
|
||||
## Step 6: Generate Machine Configurations
|
||||
|
||||
Follow these steps to generate machine configuration:
|
||||
|
||||
1. Set a variable for your cluster name by running the following command.
|
||||
Replace `<your_cluster_name>` with the name you want to give your cluster:
|
||||
|
||||
```bash
|
||||
export CLUSTER_NAME=<your_cluster_name>
|
||||
```
|
||||
|
||||
1. Run this command to generate your machine configuration files using your secrets bundle:
|
||||
|
||||
```bash
|
||||
talosctl gen config --with-secrets secrets.yaml $CLUSTER_NAME https://$YOUR_ENDPOINT:6443
|
||||
```
|
||||
|
||||
This command will generate three files:
|
||||
|
||||
- **controlplane.yaml**: Configuration for your control plane.
|
||||
- **worker.yaml**: Configuration for your worker nodes.
|
||||
- **talosconfig**: The `talosctl` configuration file used to connect to and authenticate with your cluster.
|
||||
|
||||
## Step 7: Unmount the ISO
|
||||
|
||||
Unplug your installation USB drive or unmount the ISO from all your control plane and worker nodes.
|
||||
This prevents you from accidentally installing to the USB drive and makes it clearer which disk to select for installation.
|
||||
|
||||
## Step 8: Understand Your Nodes
|
||||
|
||||
The default machine configurations for control plane and worker nodes are typically sufficient to get your cluster running.
|
||||
However, you may need to customize certain settings such as network interfaces and disk configurations depending on your specific environment.
|
||||
|
||||
Follow these steps to verify that your machine configurations are set up correctly:
|
||||
|
||||
1. **Check network interfaces**: Run this command to view all network interfaces on any node, whether control plane or worker.
|
||||
|
||||
Replace `<node-ip-address>` with the IP of the node you want to inspect.
|
||||
|
||||
**Note**: Copy the network ID with an Operational state (OPER) value of **up**.
|
||||
|
||||
```bash
|
||||
talosctl --nodes <node-ip-address> get links --insecure
|
||||
```
|
||||
|
||||
1. **Check Available Disks:** Run this command to check all available disks on any node.
|
||||
Replace `<node-ip-address>` with the IP address of the node you want to inspect:
|
||||
|
||||
```bash
|
||||
talosctl get disks --insecure --nodes <node-ip-address>
|
||||
```
|
||||
|
||||
1. **Verify Configuration Files:** Open your `worker.yaml` and `controlplane.yaml` configuration files in your preferred editor.
|
||||
Check that the values match your worker and control plane node's network and disk settings.
|
||||
If the values don't match, you'll need to update your machine configuration..
|
||||
|
||||
**Note**: Refer to the [Talos CLI reference]({{< relref "../reference/cli" >}}) for additional commands to gather more information about your nodes and cluster.
|
||||
|
||||
## Step 9: Patch Your Machine Configuration (Optional)
|
||||
|
||||
You can patch your worker and control plane machine configuration to reflect the correct network interface and disk of your control plane nodes.
|
||||
|
||||
Follow these steps to patch your machine configuration:
|
||||
|
||||
1. Create patch files for the configurations you want to modify:
|
||||
|
||||
```bash
|
||||
touch controlplane-patch-1.yaml # For patching the control plane nodes configuration
|
||||
touch worker-patch-1.yaml # For patching the worker nodes configuration
|
||||
```
|
||||
|
||||
**Note**: You don't have to create both patch files, only create patches for the configurations you actually need to modify.
|
||||
|
||||
You can also create multiple patch files (e.g., `controlplane-patch-2.yaml`, `controlplane-patch-3.yaml`) if you want to make multiple subsequent patches to the same machine configuration.
|
||||
|
||||
1. Copy and paste this YAML block of code and add the correct hardware values to each patch file.
|
||||
|
||||
For example, for `controlplane-patch-1` use the network interface and disk information you gathered from your control plane nodes :
|
||||
|
||||
```yaml
|
||||
# controlplane-patch-1 file
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: <control-plane-network-interface> # From control plane node
|
||||
dhcp: true
|
||||
install:
|
||||
disk: /dev/<control-plane-disk-name> # From control plane node
|
||||
```
|
||||
|
||||
For `worker-patch-1.yaml`, use network interface and disk information from your worker nodes:
|
||||
|
||||
```yaml
|
||||
# worker-patch-1.yaml file
|
||||
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: <worker-network-interface> # From worker node
|
||||
dhcp: true
|
||||
install:
|
||||
disk: /dev/<worker-disk-name> # From worker node
|
||||
```
|
||||
|
||||
1. Apply the different patch files for the different machine configuration:
|
||||
- **For control plane**:
|
||||
|
||||
```bash
|
||||
talosctl machineconfig patch controlplane.yaml --patch @controlplane-patch-1.yaml --output controlplane.yaml
|
||||
```
|
||||
|
||||
- **For worker**:
|
||||
|
||||
```bash
|
||||
talosctl machineconfig patch worker.yaml --patch @worker-patch-1.yaml --output worker.yaml
|
||||
```
|
||||
|
||||
Additionally, you can learn more about [patches]({{< relref "../talos-guides/configuration/patching/" >}}) from the configuration patches documentation.
|
||||
|
||||
## Step 10: Apply the Machine Configuration
|
||||
|
||||
To apply your machine configuration:
|
||||
|
||||
1. Run this command to apply the `controlplane.yaml` configuration to your control plane nodes:
|
||||
|
||||
```bash
|
||||
for ip in "${CONTROL_PLANE_IP[@]}"; do
|
||||
echo "=== Applying configuration to node $ip ==="
|
||||
talosctl apply-config --insecure \
|
||||
--nodes $ip \
|
||||
--file controlplane.yaml
|
||||
echo "Configuration applied to $ip"
|
||||
echo ""
|
||||
done
|
||||
```
|
||||
|
||||
1. Run this command to apply the `worker.yaml`configuration to your worker node:
|
||||
|
||||
```bash
|
||||
for ip in "${WORKER_IP[@]}"; do
|
||||
echo "=== Applying configuration to node $ip ==="
|
||||
talosctl apply-config --insecure \
|
||||
--nodes $ip \
|
||||
--file worker.yaml
|
||||
echo "Configuration applied to $ip"
|
||||
echo ""
|
||||
done
|
||||
```
|
||||
|
||||
## Step 11: Manage Your Talos Configuration File
|
||||
|
||||
The `talosconfig` is your key to managing the Talos Linux cluster, without it, you cannot authenticate or communicate with your cluster nodes using `talosctl`.
|
||||
|
||||
You have two options for managing your `talosconfig`:
|
||||
|
||||
1. Merge your new `talosconfig` into the default configuration file located at `~/.talos/config`:
|
||||
|
||||
```bash
|
||||
talosctl config merge ./talosconfig
|
||||
```
|
||||
|
||||
1. Copy the configuration file to your `~/.talos` directory and set the `TALOSCONFIG` environment variable:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.talos
|
||||
cp ./talosconfig ~/.talos/config
|
||||
export TALOSCONFIG=~/.talos/config
|
||||
```
|
||||
|
||||
## Step 12: Set Endpoints of Your Control Plane Nodes
|
||||
|
||||
Configure your endpoints to enable talosctl to automatically load balance requests and fail over between control plane nodes when individual nodes become unavailable.
|
||||
|
||||
Run this command to configure your endpoints.
|
||||
Replace the placeholders `<control_plane_IP_1> <control_plane_IP_2> <control_plane_IP_3>` with the IP addresses of your control plane nodes:
|
||||
|
||||
```bash
|
||||
talosctl config endpoint <control_plane_IP_1> <control_plane_IP_2> <control_plane_IP_3>
|
||||
```
|
||||
|
||||
**For example**:
|
||||
|
||||
If your control plane nodes IP addresses are `192.168.0.2`, `192.168.0.3`, `192.168.0.4`, your command would be:
|
||||
|
||||
```bash
|
||||
talosctl config endpoint 192.168.0.2 192.168.0.3 192.168.0.4
|
||||
```
|
||||
|
||||
## Step 13: Bootstrap Your Kubernetes Cluster
|
||||
|
||||
Wait for your control plane nodes to finish booting, then bootstrap your etcd cluster by running the command below.
|
||||
|
||||
Replace the `<control-plane-IP>` placeholder with the IP address of ONE of your three control plane nodes:
|
||||
|
||||
```bash
|
||||
talosctl bootstrap --nodes <control-plane-IP>
|
||||
```
|
||||
|
||||
**Note**: Run this command ONCE on a SINGLE control plane node.
|
||||
If you have multiple control plane nodes, you can choose any of them.
|
||||
|
||||
## Step 14: Get Kubernetes Access
|
||||
|
||||
Download your `kubeconfig` file to start using `kubectl` with your cluster.
|
||||
These commands must be run against a single control plane node.
|
||||
|
||||
You have two options for managing your `kubeconfig`.
|
||||
Replace `<control-plane-IP>` with the IP address of any one of your control plane nodes:
|
||||
|
||||
- Merge into your default `kubeconfig`:
|
||||
|
||||
```bash
|
||||
talosctl kubeconfig --nodes <control-plane-IP>
|
||||
```
|
||||
|
||||
- Create a separate `kubeconfig` file:
|
||||
|
||||
```bash
|
||||
talosctl kubeconfig alternative-kubeconfig --nodes <control-plane-IP>
|
||||
export KUBECONFIG=./alternative-kubeconfig
|
||||
|
||||
```
|
||||
|
||||
## Step 15: Verify Your Nodes Are Running
|
||||
|
||||
Run the command to ensure that your nodes are running:
|
||||
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
Congratulations!
|
||||
You now have a working production grade Talos Linux Kubernetes cluster.
|
||||
|
||||
### What's Next?
|
||||
|
||||
- [Set up persistent storage]({{< relref "../kubernetes-guides/configuration/storage" >}})
|
||||
- [Deploy a Metrics Server]({{< relref "../kubernetes-guides/configuration/deploy-metrics-server" >}})
|
||||
- [Explore the talosctl CLI reference]({{< relref "../reference/cli" >}})
|
||||
62
website/content/v1.12/introduction/quickstart.md
Normal file
62
website/content/v1.12/introduction/quickstart.md
Normal file
@ -0,0 +1,62 @@
|
||||
---
|
||||
title: Quickstart
|
||||
weight: 20
|
||||
description: "A short guide on setting up a simple Talos Linux cluster locally with Docker."
|
||||
---
|
||||
|
||||
{{< youtube IO2Yo3N46nk >}}
|
||||
|
||||
## Local Docker Cluster
|
||||
|
||||
The easiest way to try Talos is by using the CLI (`talosctl`) to create a cluster on a machine with `docker` installed.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
#### `talosctl`
|
||||
|
||||
Download `talosctl` (macOS or Linux):
|
||||
|
||||
```bash
|
||||
brew install siderolabs/tap/talosctl
|
||||
```
|
||||
|
||||
#### `kubectl`
|
||||
|
||||
Download `kubectl` via one of methods outlined in the [documentation](https://kubernetes.io/docs/tasks/tools/install-kubectl/).
|
||||
|
||||
### Create the Cluster
|
||||
|
||||
Now run the following:
|
||||
|
||||
```bash
|
||||
talosctl cluster create
|
||||
```
|
||||
|
||||
{{% alert title="Note" color="info" %}}
|
||||
If you are using Docker Desktop on a macOS computer, if you encounter the error: *Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?* you may need to manually create the link for the Docker socket:
|
||||
```sudo ln -s "$HOME/.docker/run/docker.sock" /var/run/docker.sock```
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
You can explore using Talos API commands:
|
||||
|
||||
```bash
|
||||
talosctl dashboard --nodes 10.5.0.2
|
||||
```
|
||||
|
||||
Verify that you can reach Kubernetes:
|
||||
|
||||
```bash
|
||||
kubectl get nodes -o wide
|
||||
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
||||
talos-default-controlplane-1 Ready master 115s v{{< k8s_release >}} 10.5.0.2 <none> Talos ({{< release >}}) <host kernel> containerd://1.5.5
|
||||
talos-default-worker-1 Ready <none> 115s v{{< k8s_release >}} 10.5.0.3 <none> Talos ({{< release >}}) <host kernel> containerd://1.5.5
|
||||
```
|
||||
|
||||
### Destroy the Cluster
|
||||
|
||||
When you are all done, remove the cluster:
|
||||
|
||||
```bash
|
||||
talosctl cluster destroy
|
||||
```
|
||||
58
website/content/v1.12/introduction/support-matrix.md
Normal file
58
website/content/v1.12/introduction/support-matrix.md
Normal file
@ -0,0 +1,58 @@
|
||||
---
|
||||
title: Support Matrix
|
||||
weight: 60
|
||||
description: "Table of supported Talos Linux versions and respective platforms."
|
||||
---
|
||||
|
||||
| Talos Version | 1.12 | 1.11 |
|
||||
| ----------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Release Date | 2025-12-15 (TBD) | 2025-09-01 (1.11.0) |
|
||||
| End of Community Support | 1.13.0 release (2026-04-30, TBD) | 1.12.0 release (2025-12-15, TBD) |
|
||||
| Enterprise Support | [offered by Sidero Labs Inc.](https://www.siderolabs.com/support/) | [offered by Sidero Labs Inc.](https://www.siderolabs.com/support/) |
|
||||
| Kubernetes | 1.35, 1.34, 1.33, 1.32, 1.31, 1.30 | 1.34, 1.33, 1.32, 1.31, 1.30, 1.29 |
|
||||
| NVIDIA Drivers | 570.x.x (PRODUCTION), 535.x.x (LTS) | 570.x.x (PRODUCTION), 535.x.x (LTS) |
|
||||
| Architecture | amd64, arm64 | amd64, arm64 |
|
||||
| **Platforms** | | |
|
||||
| - cloud | Akamai, AWS, GCP, Azure, CloudStack, Digital Ocean, Exoscale, Hetzner, OpenNebula, OpenStack, Oracle Cloud, Scaleway, Vultr, Upcloud | Akamai, AWS, GCP, Azure, CloudStack, Digital Ocean, Exoscale, Hetzner, OpenNebula, OpenStack, Oracle Cloud, Scaleway, Vultr, Upcloud |
|
||||
| - bare metal | x86: BIOS, UEFI, SecureBoot; arm64: UEFI, SecureBoot; boot: ISO, PXE, disk image | x86: BIOS, UEFI; arm64: UEFI; boot: ISO, PXE, disk image |
|
||||
| - virtualized | VMware, Hyper-V, KVM, Proxmox, Xen | VMware, Hyper-V, KVM, Proxmox, Xen |
|
||||
| - SBCs | Banana Pi M64, Jetson Nano, Libre Computer Board ALL-H3-CC, Nano Pi R4S, Pine64, Pine64 Rock64, Radxa ROCK Pi 4C, Radxa ROCK 4C+, Radxa ROCK 5B, Raspberry Pi 4B, Raspberry Pi Compute Module 4, Turing RK1, Orange Pi 5 | Banana Pi M64, Jetson Nano, Libre Computer Board ALL-H3-CC, Nano Pi R4S, Pine64, Pine64 Rock64, Radxa ROCK Pi 4C, Radxa ROCK 4C+, Radxa ROCK 5B, Raspberry Pi 4B, Raspberry Pi Compute Module 4, Turing RK1, Orange Pi 5 |
|
||||
| - local | Docker, QEMU | Docker, QEMU |
|
||||
| **Omni** | | |
|
||||
| [Omni](https://github.com/siderolabs/omni) | >= 1.1.0 | >= 1.1.0 |
|
||||
| **Cluster API** | | |
|
||||
| [CAPI Bootstrap Provider Talos](https://github.com/siderolabs/cluster-api-bootstrap-provider-talos) | >= 0.6.9 | >= 0.6.8 |
|
||||
| [CAPI Control Plane Provider Talos](https://github.com/siderolabs/cluster-api-control-plane-provider-talos) | >= 0.5.10 | >= 0.5.9 |
|
||||
| [Sidero](https://www.sidero.dev/) | >= 0.6.7 | >= 0.6.6 |
|
||||
|
||||
## Platform Tiers
|
||||
|
||||
* Tier 1: Automated tests, high-priority fixes.
|
||||
* Tier 2: Tested from time to time, medium-priority bugfixes.
|
||||
* Tier 3: Not tested by core Talos team, community tested.
|
||||
|
||||
### Tier 1
|
||||
|
||||
* Metal
|
||||
* AWS
|
||||
* GCP
|
||||
|
||||
### Tier 2
|
||||
|
||||
* Azure
|
||||
* Digital Ocean
|
||||
* OpenStack
|
||||
* VMWare
|
||||
|
||||
### Tier 3
|
||||
|
||||
* Akamai
|
||||
* CloudStack
|
||||
* Exoscale
|
||||
* Hetzner
|
||||
* nocloud
|
||||
* OpenNebula
|
||||
* Oracle Cloud
|
||||
* Scaleway
|
||||
* Vultr
|
||||
* Upcloud
|
||||
71
website/content/v1.12/introduction/system-requirements.md
Normal file
71
website/content/v1.12/introduction/system-requirements.md
Normal file
@ -0,0 +1,71 @@
|
||||
---
|
||||
title: System Requirements
|
||||
weight: 40
|
||||
description: "Hardware requirements for running Talos Linux."
|
||||
---
|
||||
|
||||
## Minimum Requirements
|
||||
|
||||
<table class="table-auto">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="px-4 py-2">Role</th>
|
||||
<th class="px-4 py-2">Memory</th>
|
||||
<th class="px-4 py-2">Cores</th>
|
||||
<th class="px-4 py-2">System Disk</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="border px-4 py-2">Control Plane</td>
|
||||
<td class="border px-4 py-2">2 GiB</td>
|
||||
<td class="border px-4 py-2">2</td>
|
||||
<td class="border px-4 py-2">10 GiB</td>
|
||||
</tr>
|
||||
<tr class="bg-gray-100">
|
||||
<td class="border px-4 py-2">Worker</td>
|
||||
<td class="border px-4 py-2">1 GiB</td>
|
||||
<td class="border px-4 py-2">1</td>
|
||||
<td class="border px-4 py-2">10 GiB</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
## Recommended
|
||||
|
||||
<table class="table-auto">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="px-4 py-2">Role</th>
|
||||
<th class="px-4 py-2">Memory</th>
|
||||
<th class="px-4 py-2">Cores</th>
|
||||
<th class="px-4 py-2">System Disk</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="border px-4 py-2">Control Plane</td>
|
||||
<td class="border px-4 py-2">4 GiB</td>
|
||||
<td class="border px-4 py-2">4</td>
|
||||
<td class="border px-4 py-2">100 GiB</td>
|
||||
</tr>
|
||||
<tr class="bg-gray-100">
|
||||
<td class="border px-4 py-2">Worker</td>
|
||||
<td class="border px-4 py-2">2 GiB</td>
|
||||
<td class="border px-4 py-2">2</td>
|
||||
<td class="border px-4 py-2">100 GiB</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
These requirements are similar to that of Kubernetes.
|
||||
|
||||
## Storage
|
||||
|
||||
Talos Linux itself only requires less than 100 MB of disk space, but the EPHEMERAL partition is used to store pulled images, container work directories, and so on.
|
||||
Thus a minimum is 10 GiB of disk space is required.
|
||||
100 GiB is desired.
|
||||
Note, however, that because Talos Linux assumes complete control of the disk it is installed on, so that it can control the partition table for image based upgrades, you cannot partition the rest of the disk for use by workloads.
|
||||
|
||||
Thus it is recommended to install Talos Linux on a small, dedicated disk - using a Terabyte sized SSD for the Talos install disk would be wasteful.
|
||||
Sidero Labs recommends having separate disks (apart from the Talos install disk) to be used for storage.
|
||||
432
website/content/v1.12/introduction/troubleshooting.md
Normal file
432
website/content/v1.12/introduction/troubleshooting.md
Normal file
@ -0,0 +1,432 @@
|
||||
---
|
||||
title: "Troubleshooting"
|
||||
description: "Troubleshoot control plane and other failures for Talos Linux clusters."
|
||||
aliases:
|
||||
- ../guides/troubleshooting-control-plane
|
||||
- ../advanced/troubleshooting-control-plane
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable MD026 -->
|
||||
|
||||
In this guide we assume that Talos is configured with default features enabled, such as [Discovery Service]({{< relref "../talos-guides/discovery" >}}) and [KubePrism]({{< relref "../kubernetes-guides/configuration/kubeprism" >}}).
|
||||
If these features are disabled, some of the troubleshooting steps may not apply or may need to be adjusted.
|
||||
|
||||
This guide is structured so that it can be followed step-by-step, skip sections which are not relevant to your issue.
|
||||
|
||||
## Network Configuration
|
||||
|
||||
As Talos Linux is an API-based operating system, it is important to have networking configured so that the API can be accessed.
|
||||
Some information can be gathered from the [Interactive Dashboard]({{< relref "../talos-guides/interactive-dashboard" >}}) which is available on the machine console.
|
||||
|
||||
When running in the cloud the networking should be configured automatically.
|
||||
Whereas when running on bare-metal it may need more specific configuration, see [networking `metal` configuration guide]({{< relref "../talos-guides/install/bare-metal-platforms/network-config" >}}).
|
||||
|
||||
## Talos API
|
||||
|
||||
The Talos API runs on [port 50000]({{< relref "../learn-more/talos-network-connectivity" >}}).
|
||||
Control plane nodes should always serve the Talos API, while worker nodes require access to the control plane nodes to issue TLS certificates for the workers.
|
||||
|
||||
### Firewall Issues
|
||||
|
||||
Make sure that the firewall is not blocking port 50000, and [communication]({{< relref "../learn-more/talos-network-connectivity" >}}) on ports 50000/50001 inside the cluster.
|
||||
|
||||
### Client Configuration Issues
|
||||
|
||||
Make sure to use correct `talosconfig` client configuration file matching your cluster.
|
||||
See [getting started]({{< relref "./getting-started" >}}) for more information.
|
||||
|
||||
The most common issue is that `talosctl gen config` writes `talosconfig` to the file in the current directory, while `talosctl` by default picks up the configuration from the default location (`~/.talos/config`).
|
||||
The path to the configuration file can be specified with `--talosconfig` flag to `talosctl`.
|
||||
|
||||
### Conflict on Kubernetes and Host Subnets
|
||||
|
||||
If `talosctl` returns an error saying that certificate IPs are empty, it might be due to a conflict between Kubernetes and host subnets.
|
||||
The Talos API runs on the host network, but it automatically excludes Kubernetes pod & network subnets from the useable set of addresses.
|
||||
|
||||
Talos default machine configuration specifies the following Kubernetes pod and service IPv4 CIDRs: `10.244.0.0/16` and `10.96.0.0/12`.
|
||||
If the host network is configured with one of these subnets, change the machine configuration to use a different subnet.
|
||||
|
||||
### Wrong Endpoints
|
||||
|
||||
The `talosctl` CLI connects to the Talos API via the specified endpoints, which should be a list of control plane machine addresses.
|
||||
The client will automatically retry on other endpoints if there are unavailable endpoints.
|
||||
|
||||
Worker nodes should not be used as the endpoint, as they are not able to forward request to other nodes.
|
||||
|
||||
The [VIP]({{< relref "../talos-guides/network/vip" >}}) should never be used as Talos API endpoint.
|
||||
|
||||
### TCP Loadbalancer
|
||||
|
||||
When using a TCP loadbalancer, make sure the loadbalancer endpoint is included in the `.machine.certSANs` list in the machine configuration.
|
||||
|
||||
## System Requirements
|
||||
|
||||
If minimum [system requirements]({{< relref "./system-requirements" >}}) are not met, this might manifest itself in various ways, such as random failures when starting services, or failures to pull images from the container registry.
|
||||
|
||||
## Running Health Checks
|
||||
|
||||
Talos Linux provides a set of basic health checks with `talosctl health` command which can be used to check the health of the cluster.
|
||||
|
||||
In the default mode, `talosctl health` uses information from the [discovery]({{< relref "../talos-guides/discovery" >}}) to get the information about cluster members.
|
||||
This can be overridden with command line flags `--control-plane-nodes` and `--worker-nodes`.
|
||||
|
||||
## Gathering Logs
|
||||
|
||||
While the logs and state of the system can be queried via the Talos API, it is often useful to gather the logs from all nodes in the cluster, and analyze them offline.
|
||||
The `talosctl support` command can be used to gather logs and other information from the nodes specified with `--nodes` flag (multiple nodes are supported).
|
||||
|
||||
## Discovery and Cluster Membership
|
||||
|
||||
Talos Linux uses [Discovery Service]({{< relref "../talos-guides/discovery" >}}) to discover other nodes in the cluster.
|
||||
|
||||
The list of members on each machine should be consistent: `talosctl -n <IP> get members`.
|
||||
|
||||
### Some Members are Missing
|
||||
|
||||
Ensure connectivity to the discovery service (default is `discovery.talos.dev:443`), and that the discovery registry is not disabled.
|
||||
|
||||
### Duplicate Members
|
||||
|
||||
Don't use same base secrets to generate machine configuration for multiple clusters, as some secrets are used to identify members of the same cluster.
|
||||
So if the same machine configuration (or secrets) are used to repeatedly create and destroy clusters, the discovery service will see the same nodes as members of different clusters.
|
||||
|
||||
### Removed Members are Still Present
|
||||
|
||||
Talos Linux removes itself from the discovery service when it is [reset]({{< relref "../talos-guides/resetting-a-machine" >}}).
|
||||
If the machine was not reset, it might show up as a member of the cluster for the maximum TTL of the discovery service (30 minutes), and after that it will be automatically removed.
|
||||
|
||||
## `etcd` Issues
|
||||
|
||||
`etcd` is the distributed key-value store used by Kubernetes to store its state.
|
||||
Talos Linux provides automation to manage `etcd` members running on control plane nodes.
|
||||
If `etcd` is not healthy, the Kubernetes API server will not be able to function correctly.
|
||||
|
||||
It is always recommended to run an odd number of `etcd` members, as with 3 or more members it provides fault tolerance for less than quorum member failures.
|
||||
|
||||
Common troubleshooting steps:
|
||||
|
||||
- check `etcd` service state with `talosctl -n IP service etcd` for each control plane node
|
||||
- check `etcd` membership on each control plane node with `talosctl -n IP etcd members`
|
||||
- check `etcd` logs with `talosctl -n IP logs etcd`
|
||||
- check `etcd` alarms with `talosctl -n IP etcd alarm list`
|
||||
|
||||
### All `etcd` Services are Stuck in `Pre` State
|
||||
|
||||
Make sure that a single member was [bootstrapped]({{< relref "./getting-started#kubernetes-bootstrap" >}}).
|
||||
|
||||
Check that the machine is able to pull the `etcd` container image, check `talosctl dmesg` for messages starting with `retrying:` prefix.
|
||||
|
||||
### Some `etcd` Services are Stuck in `Pre` State
|
||||
|
||||
Make sure traffic is not blocked on port 2380 between controlplane nodes.
|
||||
|
||||
Check that `etcd` quorum is not lost.
|
||||
|
||||
Check that all control plane nodes are reported in `talosctl get members` output.
|
||||
|
||||
### `etcd` Reports and Alarm
|
||||
|
||||
See [etcd maintenance]({{< relref "../advanced/etcd-maintenance" >}}) guide.
|
||||
|
||||
### `etcd` Quorum is Lost
|
||||
|
||||
See [disaster recovery]({{< relref "../advanced/disaster-recovery" >}}) guide.
|
||||
|
||||
### Other Issues
|
||||
|
||||
`etcd` will only run on control plane nodes.
|
||||
If a node is designated as a worker node, you should not expect `etcd` to be running on it.
|
||||
|
||||
When a node boots for the first time, the `etcd` data directory (`/var/lib/etcd`) is empty, and it will only be populated when `etcd` is launched.
|
||||
|
||||
If the `etcd` service is crashing and restarting, check its logs with `talosctl -n <IP> logs etcd`.
|
||||
The most common reasons for crashes are:
|
||||
|
||||
- wrong arguments passed via `extraArgs` in the configuration;
|
||||
- booting Talos on non-empty disk with an existing Talos installation, `/var/lib/etcd` contains data from the old cluster.
|
||||
|
||||
## `kubelet` and Kubernetes Node Issues
|
||||
|
||||
The `kubelet` service should be running on all Talos nodes, and it is responsible for running Kubernetes pods,
|
||||
static pods (including control plane components), and registering the node with the Kubernetes API server.
|
||||
|
||||
If the `kubelet` doesn't run on a control plane node, it will block the control plane components from starting.
|
||||
|
||||
The node will not be registered in Kubernetes until the Kubernetes API server is up and initial Kubernetes manifests are applied.
|
||||
|
||||
### `kubelet` is not running
|
||||
|
||||
Check that `kubelet` image is available (`talosctl image ls --namespace system`).
|
||||
|
||||
Check `kubelet` logs with `talosctl -n IP logs kubelet` for startup errors:
|
||||
|
||||
- make sure Kubernetes version is [supported]({{< relref "./support-matrix" >}}) with this Talos release
|
||||
- make sure `kubelet` extra arguments and extra configuration supplied with Talos machine configuration is valid
|
||||
|
||||
### Talos Complains about Node Not Found
|
||||
|
||||
`kubelet` hasn't yet registered the node with the Kubernetes API server, this is expected during initial cluster bootstrap, the error will go away.
|
||||
If the message persists, check Kubernetes API health.
|
||||
|
||||
The Kubernetes controller manager (`kube-controller-manager`) is responsible for monitoring the certificate
|
||||
signing requests (CSRs) and issuing certificates for each of them.
|
||||
The `kubelet` is responsible for generating and submitting the CSRs for its
|
||||
associated node.
|
||||
|
||||
The state of any CSRs can be checked with `kubectl get csr`:
|
||||
|
||||
```bash
|
||||
$ kubectl get csr
|
||||
NAME AGE SIGNERNAME REQUESTOR CONDITION
|
||||
csr-jcn9j 14m kubernetes.io/kube-apiserver-client-kubelet system:bootstrap:q9pyzr Approved,Issued
|
||||
csr-p6b9q 14m kubernetes.io/kube-apiserver-client-kubelet system:bootstrap:q9pyzr Approved,Issued
|
||||
csr-sw6rm 14m kubernetes.io/kube-apiserver-client-kubelet system:bootstrap:q9pyzr Approved,Issued
|
||||
csr-vlghg 14m kubernetes.io/kube-apiserver-client-kubelet system:bootstrap:q9pyzr Approved,Issued
|
||||
```
|
||||
|
||||
### `kubectl get nodes` Reports Wrong Internal IP
|
||||
|
||||
Configure the correct internal IP address with [`.machine.kubelet.nodeIP`]({{< relref "../reference/configuration/v1alpha1/config#Config.machine.kubelet.nodeIP" >}})
|
||||
|
||||
### `kubectl get nodes` Reports Wrong External IP
|
||||
|
||||
Talos Linux doesn't manage the external IP, it is managed with the Kubernetes Cloud Controller Manager.
|
||||
|
||||
### `kubectl get nodes` Reports Wrong Node Name
|
||||
|
||||
By default, the Kubernetes node name is derived from the hostname.
|
||||
Update the hostname using the machine configuration, cloud configuration, or via DHCP server.
|
||||
|
||||
### Node Is Not Ready
|
||||
|
||||
A Node in Kubernetes is marked as `Ready` only once its CNI is up.
|
||||
It takes a minute or two for the CNI images to be pulled and for the CNI to start.
|
||||
If the node is stuck in this state for too long, check CNI pods and logs with `kubectl`.
|
||||
Usually, CNI-related resources are created in `kube-system` namespace.
|
||||
|
||||
For example, for the default Talos Flannel CNI:
|
||||
|
||||
```bash
|
||||
$ kubectl -n kube-system get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
...
|
||||
kube-flannel-25drx 1/1 Running 0 23m
|
||||
kube-flannel-8lmb6 1/1 Running 0 23m
|
||||
kube-flannel-gl7nx 1/1 Running 0 23m
|
||||
kube-flannel-jknt9 1/1 Running 0 23m
|
||||
...
|
||||
```
|
||||
|
||||
### Duplicate/Stale Nodes
|
||||
|
||||
Talos Linux doesn't remove Kubernetes nodes automatically, so if a node is removed from the cluster, it will still be present in Kubernetes.
|
||||
Remove the node from Kubernetes with `kubectl delete node <node-name>`.
|
||||
|
||||
### Talos Complains about Certificate Errors on `kubelet` API
|
||||
|
||||
This error might appear during initial cluster bootstrap, and it will go away once the Kubernetes API server is up and the node is registered.
|
||||
|
||||
The example of Talos logs:
|
||||
|
||||
```bash
|
||||
[talos] controller failed {"component": "controller-runtime", "controller": "k8s.KubeletStaticPodController", "error": "error refreshing pod status: error fetching pod status: Get \"https://127.0.0.1:10250/pods/?timeout=30s\": remote error: tls: internal error"}
|
||||
```
|
||||
|
||||
By default configuration, `kubelet` issues a self-signed server certificate, but when `rotate-server-certificates` feature is enabled,
|
||||
`kubelet` issues its certificate using `kube-apiserver`.
|
||||
Make sure the `kubelet` CSR is approved by the Kubernetes API server.
|
||||
|
||||
In either case, this error is not critical, as it only affects reporting of the pod status to Talos Linux.
|
||||
|
||||
## Kubernetes Control Plane
|
||||
|
||||
The Kubernetes control plane consists of the following components:
|
||||
|
||||
- `kube-apiserver` - the Kubernetes API server
|
||||
- `kube-controller-manager` - the Kubernetes controller manager
|
||||
- `kube-scheduler` - the Kubernetes scheduler
|
||||
|
||||
Optionally, `kube-proxy` runs as a DaemonSet to provide pod-to-service communication.
|
||||
|
||||
`coredns` provides name resolution for the cluster.
|
||||
|
||||
CNI is not part of the control plane, but it is required for Kubernetes pods using pod networking.
|
||||
|
||||
Troubleshooting should always start with `kube-apiserver`, and then proceed to other components.
|
||||
|
||||
Talos Linux configures `kube-apiserver` to talk to the `etcd` running on the same node, so `etcd` must be healthy before `kube-apiserver` can start.
|
||||
The `kube-controller-manager` and `kube-scheduler` are configured to talk to the `kube-apiserver` on the same node, so they will not start until `kube-apiserver` is healthy.
|
||||
|
||||
### Control Plane Static Pods
|
||||
|
||||
Talos should generate the static pod definitions for the Kubernetes control plane
|
||||
as resources:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> get staticpods
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 k8s StaticPod kube-apiserver 1
|
||||
172.20.0.2 k8s StaticPod kube-controller-manager 1
|
||||
172.20.0.2 k8s StaticPod kube-scheduler 1
|
||||
```
|
||||
|
||||
Talos should report that the static pod definitions are rendered for the `kubelet`:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> dmesg | grep 'rendered new'
|
||||
172.20.0.2: user: warning: [2023-04-26T19:17:52.550527204Z]: [talos] rendered new static pod {"component": "controller-runtime", "controller": "k8s.StaticPodServerController", "id": "kube-apiserver"}
|
||||
172.20.0.2: user: warning: [2023-04-26T19:17:52.552186204Z]: [talos] rendered new static pod {"component": "controller-runtime", "controller": "k8s.StaticPodServerController", "id": "kube-controller-manager"}
|
||||
172.20.0.2: user: warning: [2023-04-26T19:17:52.554607204Z]: [talos] rendered new static pod {"component": "controller-runtime", "controller": "k8s.StaticPodServerController", "id": "kube-scheduler"}
|
||||
```
|
||||
|
||||
If the static pod definitions are not rendered, check `etcd` and `kubelet` service health (see above)
|
||||
and the controller runtime logs (`talosctl logs controller-runtime`).
|
||||
|
||||
### Control Plane Pod Status
|
||||
|
||||
Initially the `kube-apiserver` component will not be running, and it takes some time before it becomes fully up
|
||||
during bootstrap (image should be pulled from the Internet, etc.)
|
||||
|
||||
The status of the control plane components on each of the control plane nodes can be checked with `talosctl containers -k`:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> containers --kubernetes
|
||||
NODE NAMESPACE ID IMAGE PID STATUS
|
||||
172.20.0.2 k8s.io kube-system/kube-apiserver-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2539 SANDBOX_READY
|
||||
172.20.0.2 k8s.io └─ kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver:51c3aad7a271 registry.k8s.io/kube-apiserver:v{{< k8s_release >}} 2572 CONTAINER_RUNNING
|
||||
```
|
||||
|
||||
The logs of the control plane components can be checked with `talosctl logs --kubernetes` (or with `-k` as a shorthand):
|
||||
|
||||
```bash
|
||||
talosctl -n <IP> logs -k kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver:51c3aad7a271
|
||||
```
|
||||
|
||||
If the control plane component reports error on startup, check that:
|
||||
|
||||
- make sure Kubernetes version is [supported]({{< relref "./support-matrix" >}}) with this Talos release
|
||||
- make sure extra arguments and extra configuration supplied with Talos machine configuration is valid
|
||||
|
||||
### Kubernetes Bootstrap Manifests
|
||||
|
||||
As part of the bootstrap process, Talos injects bootstrap manifests into Kubernetes API server.
|
||||
There are two kinds of these manifests: system manifests built-in into Talos and extra manifests downloaded (custom CNI, extra manifests in the machine config):
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> get manifests
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 controlplane Manifest 00-kubelet-bootstrapping-token 1
|
||||
172.20.0.2 controlplane Manifest 01-csr-approver-role-binding 1
|
||||
172.20.0.2 controlplane Manifest 01-csr-node-bootstrap 1
|
||||
172.20.0.2 controlplane Manifest 01-csr-renewal-role-binding 1
|
||||
172.20.0.2 controlplane Manifest 02-kube-system-sa-role-binding 1
|
||||
172.20.0.2 controlplane Manifest 03-default-pod-security-policy 1
|
||||
172.20.0.2 controlplane Manifest 05-https://docs.projectcalico.org/manifests/calico.yaml 1
|
||||
172.20.0.2 controlplane Manifest 10-kube-proxy 1
|
||||
172.20.0.2 controlplane Manifest 11-core-dns 1
|
||||
172.20.0.2 controlplane Manifest 11-core-dns-svc 1
|
||||
172.20.0.2 controlplane Manifest 11-kube-config-in-cluster 1
|
||||
```
|
||||
|
||||
Details of each manifest can be queried by adding `-o yaml`:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> get manifests 01-csr-approver-role-binding --namespace=controlplane -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: Manifests.kubernetes.talos.dev
|
||||
id: 01-csr-approver-role-binding
|
||||
version: 1
|
||||
phase: running
|
||||
spec:
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: system-bootstrap-approve-node-client-csr
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
|
||||
subjects:
|
||||
- apiGroup: rbac.authorization.k8s.io
|
||||
kind: Group
|
||||
name: system:bootstrappers
|
||||
```
|
||||
|
||||
### Other Control Plane Components
|
||||
|
||||
Once the Kubernetes API server is up, other control plane components issues can be troubleshooted with `kubectl`:
|
||||
|
||||
```shell
|
||||
kubectl get nodes -o wide
|
||||
kubectl get pods -o wide --all-namespaces
|
||||
kubectl describe pod -n NAMESPACE POD
|
||||
kubectl logs -n NAMESPACE POD
|
||||
```
|
||||
|
||||
## Kubernetes API
|
||||
|
||||
The Kubernetes API client configuration (`kubeconfig`) can be retrieved using Talos API with `talosctl -n <IP> kubeconfig` command.
|
||||
Talos Linux mostly doesn't depend on the Kubernetes API endpoint for the cluster, but Kubernetes API endpoint should be configured
|
||||
correctly for external access to the cluster.
|
||||
|
||||
### Kubernetes Control Plane Endpoint
|
||||
|
||||
The Kubernetes control plane endpoint is the single canonical URL by which the
|
||||
Kubernetes API is accessed.
|
||||
Especially with high-availability (HA) control planes, this endpoint may point to a load balancer or a DNS name which may
|
||||
have multiple `A` and `AAAA` records.
|
||||
|
||||
Like Talos' own API, the Kubernetes API uses mutual TLS, client
|
||||
certs, and a common Certificate Authority (CA).
|
||||
Unlike general-purpose websites, there is no need for an upstream CA, so tools
|
||||
such as cert-manager, Let's Encrypt, or products such
|
||||
as validated TLS certificates are not required.
|
||||
Encryption, however, _is_, and hence the URL scheme will always be `https://`.
|
||||
|
||||
By default, the Kubernetes API server in Talos runs on port 6443.
|
||||
As such, the control plane endpoint URLs for Talos will almost always be of the form
|
||||
`https://endpoint:6443`.
|
||||
(The port, since it is not the `https` default of `443` is required.)
|
||||
The `endpoint` above may be a DNS name or IP address, but it should be
|
||||
directed to the _set_ of all controlplane nodes, as opposed to a
|
||||
single one.
|
||||
|
||||
As mentioned above, this can be achieved by a number of strategies, including:
|
||||
|
||||
- an external load balancer
|
||||
- DNS records
|
||||
- Talos-builtin shared IP ([VIP]({{< relref "../talos-guides/network/vip" >}}))
|
||||
- BGP peering of a shared IP (such as with [kube-vip](https://kube-vip.io))
|
||||
|
||||
Using a DNS name here is a good idea, since it allows any other option, while offering
|
||||
a layer of abstraction.
|
||||
It allows the underlying IP addresses to change without impacting the
|
||||
canonical URL.
|
||||
|
||||
Unlike most services in Kubernetes, the API server runs with host networking,
|
||||
meaning that it shares the network namespace with the host.
|
||||
This means you can use the IP address(es) of the host to refer to the Kubernetes
|
||||
API server.
|
||||
|
||||
For availability of the API, it is important that any load balancer be aware of
|
||||
the health of the backend API servers, to minimize disruptions during
|
||||
common node operations like reboots and upgrades.
|
||||
|
||||
## Miscellaneous
|
||||
|
||||
### Checking Controller Runtime Logs
|
||||
|
||||
Talos runs a set of [controllers]({{< relref "../learn-more/controllers-resources" >}}) which operate on resources to build and support machine operations.
|
||||
|
||||
Some debugging information can be queried from the controller logs with `talosctl logs controller-runtime`:
|
||||
|
||||
```bash
|
||||
talosctl -n <IP> logs controller-runtime
|
||||
```
|
||||
|
||||
Controllers continuously run a reconcile loop, so at any time, they may be starting, failing, or restarting.
|
||||
This is expected behavior.
|
||||
|
||||
If there are no new messages in the `controller-runtime` log, it means that the controllers have successfully finished reconciling, and that the current system state is the desired system state.
|
||||
9
website/content/v1.12/introduction/what-is-new/index.md
Normal file
9
website/content/v1.12/introduction/what-is-new/index.md
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
title: What's New in Talos 1.12.0
|
||||
weight: 50
|
||||
description: "Discover the latest features and updates in Talos Linux 1.12."
|
||||
---
|
||||
|
||||
For critical changes, refer to the [upgrade notes]({{< relref "../talos-guides/upgrading-talos" >}}).
|
||||
|
||||
TBD
|
||||
28
website/content/v1.12/introduction/what-is-talos.md
Normal file
28
website/content/v1.12/introduction/what-is-talos.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
title: What is Talos?
|
||||
weight: 10
|
||||
description: "A quick introduction in to what Talos is and why it should be used."
|
||||
---
|
||||
|
||||
Talos is a container optimized Linux distro; a reimagining of Linux for distributed systems such as Kubernetes.
|
||||
Designed to be as minimal as possible while still maintaining practicality.
|
||||
For these reasons, Talos has a number of features unique to it:
|
||||
|
||||
- it is immutable
|
||||
- it is atomic
|
||||
- it is ephemeral
|
||||
- it is minimal
|
||||
- it is secure by default
|
||||
- it is managed via a single declarative configuration file and gRPC API
|
||||
|
||||
Talos can be deployed on container, cloud, virtualized, and bare metal platforms.
|
||||
|
||||
## Why Talos
|
||||
|
||||
In having less, Talos offers more.
|
||||
Security.
|
||||
Efficiency.
|
||||
Resiliency.
|
||||
Consistency.
|
||||
|
||||
All of these areas are improved simply by having less.
|
||||
5
website/content/v1.12/kubernetes-guides/_index.md
Normal file
5
website/content/v1.12/kubernetes-guides/_index.md
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
title: "Kubernetes Guides"
|
||||
weight: 30
|
||||
description: "Management of a Kubernetes Cluster hosted by Talos Linux"
|
||||
---
|
||||
@ -0,0 +1,5 @@
|
||||
---
|
||||
title: "Configuration"
|
||||
weight: 10
|
||||
description: "How to configure components of the Kubernetes cluster itself."
|
||||
---
|
||||
@ -0,0 +1,285 @@
|
||||
---
|
||||
title: "Ceph Storage cluster with Rook"
|
||||
description: "Guide on how to create a simple Ceph storage cluster with Rook for Kubernetes"
|
||||
aliases:
|
||||
- ../../guides/configuring-ceph-with-rook
|
||||
---
|
||||
|
||||
## Preparation
|
||||
|
||||
Talos Linux reserves an entire disk for the OS installation, so machines with multiple available disks are needed for a reliable Ceph cluster with Rook and Talos Linux.
|
||||
Rook requires that the block devices or partitions used by Ceph have no partitions or formatted filesystems before use.
|
||||
Rook also requires a minimum Kubernetes version of `v1.16` and Helm `v3.0` for installation of charts.
|
||||
It is highly recommended that the [Rook Ceph overview](https://rook.io/docs/rook/v1.8/ceph-storage.html) is read and understood before deploying a Ceph cluster with Rook.
|
||||
|
||||
## Installation
|
||||
|
||||
Creating a Ceph cluster with Rook requires two steps; first the Rook Operator needs to be installed which can be done with a Helm Chart.
|
||||
The example below installs the Rook Operator into the `rook-ceph` namespace, which is the default for a Ceph cluster with Rook.
|
||||
|
||||
```shell
|
||||
$ helm repo add rook-release https://charts.rook.io/release
|
||||
"rook-release" has been added to your repositories
|
||||
|
||||
$ helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph
|
||||
W0327 17:52:44.277830 54987 warnings.go:70] policy/v1beta1 PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+
|
||||
W0327 17:52:44.612243 54987 warnings.go:70] policy/v1beta1 PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+
|
||||
NAME: rook-ceph
|
||||
LAST DEPLOYED: Sun Mar 27 17:52:42 2022
|
||||
NAMESPACE: rook-ceph
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
The Rook Operator has been installed. Check its status by running:
|
||||
kubectl --namespace rook-ceph get pods -l "app=rook-ceph-operator"
|
||||
|
||||
Visit https://rook.io/docs/rook/latest for instructions on how to create and configure Rook clusters
|
||||
|
||||
Important Notes:
|
||||
- You must customize the 'CephCluster' resource in the sample manifests for your cluster.
|
||||
- Each CephCluster must be deployed to its own namespace, the samples use `rook-ceph` for the namespace.
|
||||
- The sample manifests assume you also installed the rook-ceph operator in the `rook-ceph` namespace.
|
||||
- The helm chart includes all the RBAC required to create a CephCluster CRD in the same namespace.
|
||||
- Any disk devices you add to the cluster in the 'CephCluster' must be empty (no filesystem and no partitions).
|
||||
```
|
||||
|
||||
Default PodSecurity configuration prevents execution of priviledged pods.
|
||||
Adding a label to the namespace will allow ceph to start.
|
||||
|
||||
```shell
|
||||
kubectl label namespace rook-ceph pod-security.kubernetes.io/enforce=privileged
|
||||
```
|
||||
|
||||
Once that is complete, the Ceph cluster can be installed with the official Helm Chart.
|
||||
The Chart can be installed with default values, which will attempt to use all nodes in the Kubernetes cluster, and all unused disks on each node for Ceph storage, and make available block storage, object storage, as well as a shared filesystem.
|
||||
Generally more specific node/device/cluster configuration is used, and the [Rook documentation](https://rook.io/docs/rook/v1.8/ceph-cluster-crd.html) explains all the available options in detail.
|
||||
For this example the defaults will be adequate.
|
||||
|
||||
```shell
|
||||
$ helm install --create-namespace --namespace rook-ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster
|
||||
NAME: rook-ceph-cluster
|
||||
LAST DEPLOYED: Sun Mar 27 18:12:46 2022
|
||||
NAMESPACE: rook-ceph
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
The Ceph Cluster has been installed. Check its status by running:
|
||||
kubectl --namespace rook-ceph get cephcluster
|
||||
|
||||
Visit https://rook.github.io/docs/rook/latest/ceph-cluster-crd.html for more information about the Ceph CRD.
|
||||
|
||||
Important Notes:
|
||||
- You can only deploy a single cluster per namespace
|
||||
- If you wish to delete this cluster and start fresh, you will also have to wipe the OSD disks using `sfdisk`
|
||||
```
|
||||
|
||||
Now the Ceph cluster configuration has been created, the Rook operator needs time to install the Ceph cluster and bring all the components online.
|
||||
The progression of the Ceph cluster state can be followed with the following command.
|
||||
|
||||
```shell
|
||||
$ watch kubectl --namespace rook-ceph get cephcluster rook-ceph
|
||||
Every 2.0s: kubectl --namespace rook-ceph get cephcluster rook-ceph
|
||||
|
||||
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL
|
||||
rook-ceph /var/lib/rook 3 57s Progressing Configuring Ceph Mons
|
||||
```
|
||||
|
||||
Depending on the size of the Ceph cluster and the availability of resources the Ceph cluster should become available, and with it the storage classes that can be used with Kubernetes Physical Volumes.
|
||||
|
||||
```shell
|
||||
$ kubectl --namespace rook-ceph get cephcluster rook-ceph
|
||||
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL
|
||||
rook-ceph /var/lib/rook 3 40m Ready Cluster created successfully HEALTH_OK
|
||||
|
||||
$ kubectl get storageclass
|
||||
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
|
||||
ceph-block (default) rook-ceph.rbd.csi.ceph.com Delete Immediate true 77m
|
||||
ceph-bucket rook-ceph.ceph.rook.io/bucket Delete Immediate false 77m
|
||||
ceph-filesystem rook-ceph.cephfs.csi.ceph.com Delete Immediate true 77m
|
||||
```
|
||||
|
||||
## Talos Linux Considerations
|
||||
|
||||
By default, Rook configues Ceph to have 3 `mon` instances, in which case the data stored in `dataDirHostPath` can be regenerated from the other `mon` instances.
|
||||
So when performing maintenance on a Talos Linux node with a Rook Ceph cluster (e.g. upgrading the Talos Linux version), it is imperative that care be taken to maintain the health of the Ceph cluster.
|
||||
Before upgrading, you should always check the health status of the Ceph cluster to ensure that it is healthy.
|
||||
|
||||
```shell
|
||||
$ kubectl --namespace rook-ceph get cephclusters.ceph.rook.io rook-ceph
|
||||
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL
|
||||
rook-ceph /var/lib/rook 3 98m Ready Cluster created successfully HEALTH_OK
|
||||
```
|
||||
|
||||
If it is, you can begin the upgrade process for the Talos Linux node, during which time the Ceph cluster will become unhealthy as the node is reconfigured.
|
||||
Before performing any other action on the Talos Linux nodes, the Ceph cluster must return to a healthy status.
|
||||
|
||||
```shell
|
||||
$ talosctl upgrade --nodes 172.20.15.5 --image ghcr.io/talos-systems/installer:v0.14.3
|
||||
NODE ACK STARTED
|
||||
172.20.15.5 Upgrade request received 2022-03-27 20:29:55.292432887 +0200 CEST m=+10.050399758
|
||||
|
||||
$ kubectl --namespace rook-ceph get cephclusters.ceph.rook.io
|
||||
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH EXTERNAL
|
||||
rook-ceph /var/lib/rook 3 99m Progressing Configuring Ceph Mgr(s) HEALTH_WARN
|
||||
|
||||
$ kubectl --namespace rook-ceph wait --timeout=1800s --for=jsonpath='{.status.ceph.health}=HEALTH_OK' cephclusters.ceph.rook.io rook-ceph
|
||||
cephcluster.ceph.rook.io/rook-ceph condition met
|
||||
```
|
||||
|
||||
The above steps need to be performed for each Talos Linux node undergoing maintenance, one at a time.
|
||||
|
||||
## Cleaning Up
|
||||
|
||||
### Rook Ceph Cluster Removal
|
||||
|
||||
Removing a Rook Ceph cluster requires a few steps, starting with signalling to Rook that the Ceph cluster is really being destroyed.
|
||||
Then all Persistent Volumes (and Claims) backed by the Ceph cluster must be deleted, followed by the Storage Classes and the Ceph storage types.
|
||||
|
||||
```shell
|
||||
$ kubectl --namespace rook-ceph patch cephcluster rook-ceph --type merge -p '{"spec":{"cleanupPolicy":{"confirmation":"yes-really-destroy-data"}}}'
|
||||
cephcluster.ceph.rook.io/rook-ceph patched
|
||||
|
||||
$ kubectl delete storageclasses ceph-block ceph-bucket ceph-filesystem
|
||||
storageclass.storage.k8s.io "ceph-block" deleted
|
||||
storageclass.storage.k8s.io "ceph-bucket" deleted
|
||||
storageclass.storage.k8s.io "ceph-filesystem" deleted
|
||||
|
||||
$ kubectl --namespace rook-ceph delete cephblockpools ceph-blockpool
|
||||
cephblockpool.ceph.rook.io "ceph-blockpool" deleted
|
||||
|
||||
$ kubectl --namespace rook-ceph delete cephobjectstore ceph-objectstore
|
||||
cephobjectstore.ceph.rook.io "ceph-objectstore" deleted
|
||||
|
||||
$ kubectl --namespace rook-ceph delete cephfilesystem ceph-filesystem
|
||||
cephfilesystem.ceph.rook.io "ceph-filesystem" deleted
|
||||
```
|
||||
|
||||
Once that is complete, the Ceph cluster itself can be removed, along with the Rook Ceph cluster Helm chart installation.
|
||||
|
||||
```shell
|
||||
$ kubectl --namespace rook-ceph delete cephcluster rook-ceph
|
||||
cephcluster.ceph.rook.io "rook-ceph" deleted
|
||||
|
||||
$ helm --namespace rook-ceph uninstall rook-ceph-cluster
|
||||
release "rook-ceph-cluster" uninstalled
|
||||
```
|
||||
|
||||
If needed, the Rook Operator can also be removed along with all the Custom Resource Definitions that it created.
|
||||
|
||||
```shell
|
||||
$ helm --namespace rook-ceph uninstall rook-ceph
|
||||
W0328 12:41:14.998307 147203 warnings.go:70] policy/v1beta1 PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+
|
||||
These resources were kept due to the resource policy:
|
||||
[CustomResourceDefinition] cephblockpools.ceph.rook.io
|
||||
[CustomResourceDefinition] cephbucketnotifications.ceph.rook.io
|
||||
[CustomResourceDefinition] cephbuckettopics.ceph.rook.io
|
||||
[CustomResourceDefinition] cephclients.ceph.rook.io
|
||||
[CustomResourceDefinition] cephclusters.ceph.rook.io
|
||||
[CustomResourceDefinition] cephfilesystemmirrors.ceph.rook.io
|
||||
[CustomResourceDefinition] cephfilesystems.ceph.rook.io
|
||||
[CustomResourceDefinition] cephfilesystemsubvolumegroups.ceph.rook.io
|
||||
[CustomResourceDefinition] cephnfses.ceph.rook.io
|
||||
[CustomResourceDefinition] cephobjectrealms.ceph.rook.io
|
||||
[CustomResourceDefinition] cephobjectstores.ceph.rook.io
|
||||
[CustomResourceDefinition] cephobjectstoreusers.ceph.rook.io
|
||||
[CustomResourceDefinition] cephobjectzonegroups.ceph.rook.io
|
||||
[CustomResourceDefinition] cephobjectzones.ceph.rook.io
|
||||
[CustomResourceDefinition] cephrbdmirrors.ceph.rook.io
|
||||
[CustomResourceDefinition] objectbucketclaims.objectbucket.io
|
||||
[CustomResourceDefinition] objectbuckets.objectbucket.io
|
||||
|
||||
release "rook-ceph" uninstalled
|
||||
|
||||
$ kubectl delete crds cephblockpools.ceph.rook.io cephbucketnotifications.ceph.rook.io cephbuckettopics.ceph.rook.io \
|
||||
cephclients.ceph.rook.io cephclusters.ceph.rook.io cephfilesystemmirrors.ceph.rook.io \
|
||||
cephfilesystems.ceph.rook.io cephfilesystemsubvolumegroups.ceph.rook.io \
|
||||
cephnfses.ceph.rook.io cephobjectrealms.ceph.rook.io cephobjectstores.ceph.rook.io \
|
||||
cephobjectstoreusers.ceph.rook.io cephobjectzonegroups.ceph.rook.io cephobjectzones.ceph.rook.io \
|
||||
cephrbdmirrors.ceph.rook.io objectbucketclaims.objectbucket.io objectbuckets.objectbucket.io
|
||||
customresourcedefinition.apiextensions.k8s.io "cephblockpools.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephbucketnotifications.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephbuckettopics.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephclients.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephclusters.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephfilesystemmirrors.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephfilesystems.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephfilesystemsubvolumegroups.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephnfses.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephobjectrealms.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephobjectstores.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephobjectstoreusers.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephobjectzonegroups.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephobjectzones.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "cephrbdmirrors.ceph.rook.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "objectbucketclaims.objectbucket.io" deleted
|
||||
customresourcedefinition.apiextensions.k8s.io "objectbuckets.objectbucket.io" deleted
|
||||
```
|
||||
|
||||
### Talos Linux Rook Metadata Removal
|
||||
|
||||
If the Rook Operator is cleanly removed following the above process, the node metadata and disks should be clean and ready to be re-used.
|
||||
In the case of an unclean cluster removal, there may be still a few instances of metadata stored on the system disk, as well as the partition information on the storage disks.
|
||||
First the node metadata needs to be removed, make sure to update the `nodeName` with the actual name of a storage node that needs cleaning, and `path` with the Rook configuration `dataDirHostPath` (this is `/var/lib/rook` when using the default values.yaml) set when installing the chart.
|
||||
The following will need to be repeated for each node used in the Rook Ceph cluster.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: disk-clean
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
nodeName: <storage-node-name>
|
||||
volumes:
|
||||
- name: rook-data-dir
|
||||
hostPath:
|
||||
path: <dataDirHostPath>
|
||||
containers:
|
||||
- name: disk-clean
|
||||
image: busybox
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: rook-data-dir
|
||||
mountPath: /node/rook-data
|
||||
command: ["/bin/sh", "-c", "rm -rf /node/rook-data/*"]
|
||||
EOF
|
||||
pod/disk-clean created
|
||||
|
||||
$ kubectl wait --timeout=900s --for=jsonpath='{.status.phase}=Succeeded' pod disk-clean
|
||||
pod/disk-clean condition met
|
||||
|
||||
$ kubectl delete pod disk-clean
|
||||
pod "disk-clean" deleted
|
||||
```
|
||||
|
||||
Lastly, the disks themselves need the partition and filesystem data wiped before they can be reused.
|
||||
Again, the following as to be repeated for each node **and** disk used in the Rook Ceph cluster, updating `nodeName` and `of=` in the `command` as needed.
|
||||
|
||||
```shell
|
||||
$ cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: disk-wipe
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
nodeName: <storage-node-name>
|
||||
containers:
|
||||
- name: disk-wipe
|
||||
image: busybox
|
||||
securityContext:
|
||||
privileged: true
|
||||
command: ["/bin/sh", "-c", "dd if=/dev/zero bs=1M count=100 oflag=direct of=<device>"]
|
||||
EOF
|
||||
pod/disk-wipe created
|
||||
|
||||
$ kubectl wait --timeout=900s --for=jsonpath='{.status.phase}=Succeeded' pod disk-wipe
|
||||
pod/disk-wipe condition met
|
||||
|
||||
$ kubectl delete pod disk-wipe
|
||||
pod "disk-wipe" deleted
|
||||
```
|
||||
@ -0,0 +1,45 @@
|
||||
---
|
||||
title: "Deploying Metrics Server"
|
||||
description: "In this guide you will learn how to set up metrics-server."
|
||||
aliases:
|
||||
- ../../guides/deploy-metrics-server
|
||||
---
|
||||
|
||||
Metrics Server enables use of the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) and [Vertical Pod Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler).
|
||||
It does this by gathering metrics data from the kubelets in a cluster.
|
||||
By default, the certificates in use by the kubelets will not be recognized by metrics-server.
|
||||
This can be solved by either configuring metrics-server to do no validation of the TLS certificates, or by modifying the kubelet configuration to rotate its certificates and use ones that will be recognized by metrics-server.
|
||||
|
||||
## Node Configuration
|
||||
|
||||
To enable kubelet certificate rotation, all nodes should have the following Machine Config snippet:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
kubelet:
|
||||
extraArgs:
|
||||
rotate-server-certificates: true
|
||||
```
|
||||
|
||||
## Install During Bootstrap
|
||||
|
||||
We will want to ensure that new certificates for the kubelets are approved automatically.
|
||||
This can easily be done with the [Kubelet Serving Certificate Approver](https://github.com/alex1989hu/kubelet-serving-cert-approver), which will automatically approve the Certificate Signing Requests generated by the kubelets.
|
||||
|
||||
We can have Kubelet Serving Certificate Approver and metrics-server installed on the cluster automatically during bootstrap by adding the following snippet to the Cluster Config of the node that will be handling the bootstrap process:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
extraManifests:
|
||||
- https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml
|
||||
- https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
|
||||
```
|
||||
|
||||
## Install After Bootstrap
|
||||
|
||||
If you choose not to use `extraManifests` to install Kubelet Serving Certificate Approver and metrics-server during bootstrap, you can install them once the cluster is online using `kubectl`:
|
||||
|
||||
```sh
|
||||
kubectl apply -f https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml
|
||||
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
|
||||
```
|
||||
@ -0,0 +1,145 @@
|
||||
---
|
||||
title: "Device Plugins"
|
||||
description: "In this guide you will learn how to expose host devices to the Kubernetes pods."
|
||||
---
|
||||
|
||||
[Kubernetes Device Plugins](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/) can be used to expose host devices to the Kubernetes pods.
|
||||
This guide will show you how to deploy a device plugin to your Talos cluster.
|
||||
In this guide, we will use [Kubernetes Generic Device Plugin](https://github.com/squat/generic-device-plugin), but there are other implementations available.
|
||||
|
||||
## Deploying the Device Plugin
|
||||
|
||||
The Kubernetes Generic Device Plugin is a DaemonSet that runs on each node in the cluster, exposing the devices to the pods.
|
||||
The device plugin is configured with a [list of devices to expose](https://github.com/squat/generic-device-plugin#overview), e.g.
|
||||
`--device='{"name": "video", "groups": [{"paths": [{"path": "/dev/video0"}]}]}`.
|
||||
|
||||
In this guide, we will demonstrate how to deploy the device plugin with a configuration that exposes the `/dev/net/tun` device.
|
||||
This device is commonly used for user-space Wireguard, including Tailscale.
|
||||
|
||||
```yaml
|
||||
# generic-device-plugin.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: generic-device-plugin
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app.kubernetes.io/name: generic-device-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: generic-device-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: generic-device-plugin
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
tolerations:
|
||||
- operator: "Exists"
|
||||
effect: "NoExecute"
|
||||
- operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
containers:
|
||||
- image: squat/generic-device-plugin
|
||||
args:
|
||||
- --device
|
||||
- |
|
||||
name: tun
|
||||
groups:
|
||||
- count: 1000
|
||||
paths:
|
||||
- path: /dev/net/tun
|
||||
name: generic-device-plugin
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 10Mi
|
||||
limits:
|
||||
cpu: 50m
|
||||
memory: 20Mi
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
- name: dev
|
||||
mountPath: /dev
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
```
|
||||
|
||||
Apply the manifest to your cluster:
|
||||
|
||||
```sh
|
||||
kubectl apply -f generic-device-plugin.yaml
|
||||
```
|
||||
|
||||
Once the device plugin is deployed, you can verify that the nodes have a new resource: `squat.ai/tun` (the `tun` name comes from the name of the group in the device plugin configuration).:
|
||||
|
||||
```sh
|
||||
$ kubectl describe node worker-1
|
||||
...
|
||||
Allocated resources:
|
||||
Resource Requests Limits
|
||||
-------- -------- ------
|
||||
...
|
||||
squat.ai/tun 0 0
|
||||
```
|
||||
|
||||
## Deploying a Pod with the Device
|
||||
|
||||
Now that the device plugin is deployed, you can deploy a pod that requests the device.
|
||||
The request for the device is specified as a [resource](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) in the pod spec.
|
||||
|
||||
```yaml
|
||||
requests:
|
||||
limits:
|
||||
squat.ai/tun: "1"
|
||||
```
|
||||
|
||||
Here is an example non-privileged pod spec that requests the `/dev/net/tun` device:
|
||||
|
||||
```yaml
|
||||
# tun-pod.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: tun-test
|
||||
spec:
|
||||
containers:
|
||||
- image: alpine
|
||||
name: test
|
||||
command:
|
||||
- sleep
|
||||
- inf
|
||||
resources:
|
||||
limits:
|
||||
squat.ai/tun: "1"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- NET_ADMIN
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
```
|
||||
|
||||
When running the pod, you should see the `/dev/net/tun` device available:
|
||||
|
||||
```sh
|
||||
$ ls -l /dev/net/tun
|
||||
crw-rw-rw- 1 root root 10, 200 Sep 17 10:30 /dev/net/tun
|
||||
```
|
||||
@ -0,0 +1,57 @@
|
||||
---
|
||||
title: "Expose the Etcd Metrics Endpoint"
|
||||
description: "Learn how to the expose etcd metrics endpoint."
|
||||
---
|
||||
|
||||
To allow monitoring tools to collect metrics from your etcd database, you need to explicitly expose the etcd metrics endpoint.
|
||||
|
||||
Here's how to do it:
|
||||
|
||||
1. Create a patch file named `etcd-metrics-patch.yaml` that exposes the etcd metrics endpoint on `port:2381`, accessible from all network interfaces
|
||||
|
||||
```shell
|
||||
cat << EOF > etcd-metrics-patch.yaml
|
||||
- op: add
|
||||
path: /cluster/etcd/extraArgs
|
||||
value:
|
||||
listen-metrics-urls: http://0.0.0.0:2381
|
||||
EOF
|
||||
```
|
||||
|
||||
1. Create a `CP_IPS` variable that contains the IP addresses of your control plane nodes:
|
||||
|
||||
```bash
|
||||
CP_IPS="<control-plane-ip-1>,<control-plane-ip-2>,<control-plane-ip-3>"
|
||||
```
|
||||
|
||||
1. Ensure you are in your project's directory and apply the `etcd-metrics-patch.yaml` patch to your control plane nodes.
|
||||
|
||||
```bash
|
||||
talosctl patch machineconfig \
|
||||
--patch @etcd-metrics-patch.yaml \
|
||||
--endpoints $CP_IPS \
|
||||
--nodes $CP_IPS \
|
||||
--talosconfig=./talosconfig
|
||||
```
|
||||
|
||||
**Note**: You can also [export your `TALOSCONFIG` variable]({{< relref "../../introduction/prodnotes/#step-11-manage-your-talos-configuration-file">}}) and then remove the `--talosconfig=./talosconfig` flag in the patch command above.
|
||||
|
||||
1. Reboot the nodes.
|
||||
Note that if you have only one control plane node, rebooting it will cause cluster downtime.
|
||||
|
||||
```bash
|
||||
for NODE in $(echo "${CP_IPS}" | tr ',' ' '); do
|
||||
echo "Rebooting control plane node: $NODE"
|
||||
talosctl reboot --endpoints "$NODE" --nodes "$NODE" --wait
|
||||
done
|
||||
```
|
||||
|
||||
1. After the node reboots, run the following command to confirm that the etcd metrics endpoint is accessible:
|
||||
|
||||
```bash
|
||||
CP_IP=$(echo $CP_IPS | cut -d',' -f1)
|
||||
curl "${CP_IP}:2381/metrics"
|
||||
```
|
||||
|
||||
1. Secure your control plane IP addresses to prevent public access.
|
||||
See the [Ingress Firewall guide]({{< relref "../../talos-guides/network/ingress-firewall" >}}) for instructions on securing your control plane.
|
||||
@ -0,0 +1,139 @@
|
||||
---
|
||||
title: "inlineManifests and extraManifests"
|
||||
description: "Learn what inlineManifests and extraManifests are, how they differ, and why they matter."
|
||||
---
|
||||
|
||||
`inlineManifests` and `extraManifests` allow you to automatically apply Kubernetes resources to your cluster during initial bootstrap.
|
||||
|
||||
Both are designed to automate the provisioning of components like CNIs and other static infrastructure, but they differ in how the manifest content is sourced and applied.
|
||||
They are not meant for deploying applications or frequently changing services.
|
||||
For those, it's better to use a GitOps or CI/CD tool.
|
||||
|
||||
## inlineManifests
|
||||
|
||||
`inlineManifests` are defined directly within the machine configuration file.
|
||||
The YAML content is embedded inside the `inlineManifests` section, making it ideal for tightly coupled resources that need to be provisioned as soon as the node boots up.
|
||||
|
||||
Here’s an example of how to configure a cluster using an `inlineManifest`:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
inlineManifests:
|
||||
- name: my-app
|
||||
contents: |
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: my-application
|
||||
spec:
|
||||
# ... deployment specification
|
||||
```
|
||||
|
||||
## extraManifests
|
||||
|
||||
`extraManifests` are Kubernetes manifests fetched from external, unauthenticated HTTP sources such as GitHub, raw file servers, or gists.
|
||||
|
||||
You define them in the `extraManifests` section of the machine configuration.
|
||||
They’re best suited for shared, versioned, or centrally managed resources.
|
||||
|
||||
These manifests are pulled directly by the node during configuration.
|
||||
If the node doesn’t have network access to the HTTP endpoint hosting the manifest the installation will fail.
|
||||
|
||||
Similarly, if the endpoint is down or returns an error, the manifest will not be applied, and the machine configuration will fail as a result.
|
||||
|
||||
Here’s how to reference `extraManifests`:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
extraManifests:
|
||||
- "https://raw.githubusercontent.com/example/repo/main/manifest.yaml"
|
||||
- "https://gist.githubusercontent.com/user/gist-id/raw/manifest.yaml"
|
||||
```
|
||||
|
||||
## Resource Ordering Considerations
|
||||
|
||||
Talos automatically sorts all manifests, including `inlineManifests`, `extraManifests`, and built-in manifests (such as the kubelet bootstrap token and CoreDNS), before applying them in the following order:
|
||||
|
||||
1. `Namespace` resources
|
||||
2. CustomResourceDefinitions (CRDs)
|
||||
3. All other resources, sorted alphabetically by their `metadata.name` property
|
||||
|
||||
## Example Usecase: Install a GitOps controller with extraManifests
|
||||
|
||||
A common use case for `inlineManifests` or `extraManifests` is to install a GitOps controller like Flux or ArgoCD.
|
||||
Once the controller is running, it connects to your Git repository and automatically applies the rest of your Kubernetes configuration.
|
||||
|
||||
Here's how to install the Flux GitOps controller using an `extraManifest`:
|
||||
|
||||
1. Create a patch file named `flux-extra-manifest.yaml` that automatically downloads and applies the Flux installation manifest from GitHub:
|
||||
|
||||
```shell
|
||||
cat << EOF > flux-extra-manifest.yaml
|
||||
cluster:
|
||||
extraManifests:
|
||||
- "https://github.com/fluxcd/flux2/releases/latest/download/install.yaml"
|
||||
EOF
|
||||
```
|
||||
|
||||
1. Create a `CP_IPS` variable that contains the IP addresses of your control plane nodes:
|
||||
|
||||
```bash
|
||||
CP_IPS="<control-plane-ip-1>,<control-plane-ip-2>,<control-plane-ip-3>"
|
||||
```
|
||||
|
||||
1. Run this command to export your `TALOSCONFIG` variable.
|
||||
You can skip this step if you've already done it:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.talos
|
||||
cp ./talosconfig ~/.talos/config
|
||||
```
|
||||
|
||||
1. Apply the `flux-extra-manifest.yaml` patch to your control plane nodes:
|
||||
|
||||
```bash
|
||||
talosctl patch machineconfig \
|
||||
--patch @flux-extra-manifest.yaml \
|
||||
--endpoints $CP_IPS \
|
||||
--nodes $CP_IPS
|
||||
```
|
||||
|
||||
1. Wait a few seconds and check for the Flux pods:
|
||||
|
||||
```bash
|
||||
kubectl get pods -n flux-system -w
|
||||
```
|
||||
|
||||
## Omni Patches
|
||||
|
||||
You can also apply `inlineManifests` or `extraManifests` patches to Talos clusters managed by Omni.
|
||||
|
||||
Refer to [Create a Patch For Cluster Machines](https://omni.siderolabs.com/how-to-guides/create-a-patch-for-cluster-machines) to learn how to create and apply the patches.
|
||||
|
||||
## Summary: inlineManifests vs extraManifests
|
||||
|
||||
Here’s a quick overview of the key differences between `inlineManifests` and `extraManifests`:
|
||||
|
||||
| | `inlineManifests` | `extraManifests` |
|
||||
| ---------------------- | ---------------------------------------------- | ------------------------------------------------------------ |
|
||||
| Source | Defined directly in the machine configuration | Pulled from external URLs (GitHub gists, web servers, gists) |
|
||||
| Configuration Location | Under the `inlineManifests` section. | Under the `extraManifests` section |
|
||||
| Usecase | Early bootstrapping of critical resources | For reusable, version-controlled, or shared manifests |
|
||||
| Benefits | No external dependencies | Centrally managed |
|
||||
| Disadvantages | Difficult to maintain and format embedded YAML | Requires external HTTP server |
|
||||
|
||||
## How Talos Handles Manifest Resources
|
||||
|
||||
Talos continuously reconciles manifests on every boot, on every failure to apply, and on every change to the manifests in the machine config.
|
||||
When processing your `inlineManifests` and `extraManifests`, Talos follows a conservative, additive-only approach.
|
||||
|
||||
Here's what that means in practice:
|
||||
|
||||
* **Creates missing resources**: If a resource defined in your manifests doesn't exist in the cluster, Talos will create it.
|
||||
|
||||
* **Preserves existing resources**: Resources that already exist in the cluster are left completely unchanged, regardless of any differences between the current state and the manifest definition.
|
||||
|
||||
* **Never deletes resources**: Talos will not remove resources from the cluster, even if they're no longer present in your manifest configuration.
|
||||
|
||||
If you need to reapply or update these manifests after the cluster has been created, use the [Kubernetes upgrade]({{< relref "../../kubernetes-guides/upgrading-kubernetes">}}) process.
|
||||
Specify the same Kubernetes version that’s currently running in your cluster to ensure that only the manifests are reapplied, without performing a version upgrade.
|
||||
@ -0,0 +1,58 @@
|
||||
---
|
||||
title: "KubePrism"
|
||||
description: "Enabling in-cluster highly-available controlplane endpoint."
|
||||
---
|
||||
|
||||
Kubernetes pods running in CNI mode can use the `kubernetes.default.svc` service endpoint to access the Kubernetes API server,
|
||||
however pods running in host networking mode can only use the external cluster endpoint to access the Kubernetes API server.
|
||||
|
||||
Because Kubernetes controlplane and CNI components run in host networking mode, they can only use the external cluster endpoint to access the Kubernetes API server.
|
||||
If the external cluster endpoint is unavailable (due to misconfiguration, network issues, etc), this will cause issues in the cluster: pods will not be scheduled, service IPs stop working, etc.
|
||||
|
||||
KubePrism solves this problem by enabling an in-cluster highly-available controlplane endpoint on every node in the cluster.
|
||||
|
||||
## Video Walkthrough
|
||||
|
||||
To see a live demo of this writeup, see the video below:
|
||||
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/VNRE64R5akM" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
## Enabling KubePrism
|
||||
|
||||
> As of Talos 1.6, KubePrism is enabled by default with port 7445.
|
||||
|
||||
To enable KubePrism, apply the following machine config patch either during the machine config generation, or to a running cluster (the patch should be applied to all nodes):
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
features:
|
||||
kubePrism:
|
||||
enabled: true
|
||||
port: 7445
|
||||
```
|
||||
|
||||
> Note: the `port` specified should be available on every node in the cluster.
|
||||
|
||||
## How it works
|
||||
|
||||
Talos spins up a TCP loadbalancer on every machine on the `localhost` on the specified port which automatically picks up one of the endpoints:
|
||||
|
||||
* the external cluster endpoint as specified in the machine configuration
|
||||
* for controlplane machines: `https://localhost:<api-server-local-port>` (`http://localhost:6443` in the default configuration)
|
||||
* `https://<controlplane-address>:<api-server-port>` for every controlplane machine (based on the information from [Cluster Discovery]({{< relref "../../talos-guides/discovery" >}}))
|
||||
|
||||
KubePrism automatically filters out unhealthy (or unreachable) endpoints, and prefers lower-latency endpoints over higher-latency endpoints.
|
||||
|
||||
Talos automatically reconfigures `kubelet`, `kube-scheduler` and `kube-controller-manager` to use the KubePrism endpoint.
|
||||
The `kube-proxy` manifest is also reconfigured to use the KubePrism endpoint by default, but when enabling KubePrism for a running cluster the manifest should be updated
|
||||
with `talosctl upgrade-k8s` command.
|
||||
|
||||
When using CNI components that require access to the Kubernetes API server, the KubePrism endpoint should be passed to the CNI configuration (e.g. Cilium, Calico CNIs).
|
||||
|
||||
## Notes
|
||||
|
||||
As the list of endpoints for KubePrism includes the external cluster endpoint, KubePrism in the worst case scenario will behave the same as the external cluster endpoint.
|
||||
For controlplane nodes, the KubePrism should pick up the `localhost` endpoint of the `kube-apiserver`, minimizing the latency.
|
||||
Worker nodes might use direct address of the controlplane endpoint if the latency is lower than the latency of the external cluster endpoint.
|
||||
|
||||
KubePrism listen endpoint is bound to `localhost` address, so it can't be used outside the cluster.
|
||||
@ -0,0 +1,139 @@
|
||||
---
|
||||
title: "Local Storage"
|
||||
description: "Using local storage for Kubernetes workloads."
|
||||
---
|
||||
|
||||
Using local storage for Kubernetes workloads implies that the pod will be bound to the node where the local storage is available.
|
||||
Local storage is not replicated, so in case of a machine failure contents of the local storage will be lost.
|
||||
|
||||
## User Volumes
|
||||
|
||||
The simplest way to use local storage is to use [user volumes]({{< relref "../../talos-guides/configuration/disk-management/user" >}}).
|
||||
|
||||
Once the user volume is created, it is automatically mounted under `/var/mnt/u-<user-volume-name>` path on the node.
|
||||
|
||||
For example, create a configuration patch for a user volume named `local-storage`:
|
||||
|
||||
```yaml
|
||||
# local-storage.yaml
|
||||
apiVersion: v1alpha1
|
||||
kind: UserVolumeConfig
|
||||
name: local-storage
|
||||
provisioning:
|
||||
diskSelector:
|
||||
match: "!system_disk"
|
||||
minSize: 2GB
|
||||
maxSize: 2GB
|
||||
```
|
||||
|
||||
Apply the patch to the machine configuration:
|
||||
|
||||
```bash
|
||||
talosctl --nodes <WORKER_IP> patch mc --patch @local-storage.yaml
|
||||
```
|
||||
|
||||
If there is enough space available on a non-system disk (see `diskSelector`), the user volume will be created and mounted under `/var/mnt/u-local-storage` path on the node.
|
||||
|
||||
```bash
|
||||
$ talosctl -n <WORKER-IP> get volumestatus u-local-storage
|
||||
NODE NAMESPACE TYPE ID VERSION TYPE PHASE LOCATION SIZE
|
||||
172.20.0.5 runtime VolumeStatus u-local-storage 3 partition ready /dev/nvme0n2p1 2.0 GB
|
||||
$ talosctl -n <WORKER-IP> get mountstatus u-local-storage
|
||||
NODE NAMESPACE TYPE ID VERSION SOURCE TARGET FILESYSTEM VOLUME
|
||||
172.20.0.5 runtime MountStatus u-local-storage 2 /dev/nvme0n2p1 /var/mnt/local-storage xfs u-local-storage
|
||||
```
|
||||
|
||||
Now you can use the `/var/mnt/local-storage` path in your Kubernetes manifests to refer to the local storage:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: local-storage-pod
|
||||
spec:
|
||||
containers:
|
||||
- name: local-storage-container
|
||||
# ...
|
||||
volumeMounts:
|
||||
- mountPath: /usr/share
|
||||
name: local-storage-volume
|
||||
volumes:
|
||||
- name: local-storage-volume
|
||||
hostPath:
|
||||
path: /var/mnt/local-storage
|
||||
type: DirectoryOrCreate
|
||||
```
|
||||
|
||||
## Local Path Provisioner
|
||||
|
||||
[Local Path Provisioner](https://github.com/rancher/local-path-provisioner) can be used to dynamically provision local storage.
|
||||
|
||||
First, we will create a separate [user volume]({{< relref "../../talos-guides/configuration/disk-management/user" >}}) for the Local Path Provisioner to use.
|
||||
Apply the following machine configuration patch:
|
||||
|
||||
> Note: make sure you have [enough space]({{< relref "../../talos-guides/configuration/disk-management/layout" >}}) available to provision the user volume.
|
||||
|
||||
```yaml
|
||||
apiVersion: v1alpha1
|
||||
kind: UserVolumeConfig
|
||||
name: local-path-provisioner
|
||||
provisioning:
|
||||
diskSelector:
|
||||
match: disk.transport == 'nvme'
|
||||
minSize: 200GB
|
||||
maxSize: 200GB
|
||||
```
|
||||
|
||||
Make sure to update Local Path Provisioner configuration to use a the user volume path `/var/mnt/local-path-provisioner` as the root path for the local storage.
|
||||
|
||||
For example, Local Path Provisioner can be installed using [kustomize](https://kustomize.io/) with the following configuration:
|
||||
|
||||
```yaml
|
||||
# kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- github.com/rancher/local-path-provisioner/deploy?ref=v0.0.31
|
||||
patches:
|
||||
- patch: |-
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: local-path-config
|
||||
namespace: local-path-storage
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"nodePathMap":[
|
||||
{
|
||||
"node":"DEFAULT_PATH_FOR_NON_LISTED_NODES",
|
||||
"paths":["/var/mnt/local-path-provisioner"]
|
||||
}
|
||||
]
|
||||
}
|
||||
- patch: |-
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: local-path
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: "true"
|
||||
- patch: |-
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: local-path-storage
|
||||
labels:
|
||||
pod-security.kubernetes.io/enforce: privileged
|
||||
```
|
||||
|
||||
Put `kustomization.yaml` into a new directory, and run `kustomize build | kubectl apply -f -` to install Local Path Provisioner to a Talos Linux cluster.
|
||||
There are three patches applied:
|
||||
|
||||
* change default `/opt/local-path-provisioner` path to `/var/mnt/local-path-provisioner`
|
||||
* make `local-path` storage class the default storage class (optional)
|
||||
* label the `local-path-storage` namespace as privileged to allow privileged pods to be scheduled there
|
||||
|
||||
To test the Local Path Provisioner, you can refer to the [Usage section of the official guide](https://github.com/rancher/local-path-provisioner?tab=readme-ov-file#usage).
|
||||
|
||||
You can check that directories for PVCs are created on the node's filesystem with the `talosctl ls /var/mnt/local-path-provisioner` command.
|
||||
@ -0,0 +1,288 @@
|
||||
---
|
||||
title: "Pod Security"
|
||||
description: "Enabling Pod Security Admission plugin to configure Pod Security Standards."
|
||||
aliases:
|
||||
- ../../guides/pod-security
|
||||
---
|
||||
|
||||
Kubernetes deprecated [Pod Security Policy (PSP)](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) in version 1.21 and removed it entirely in 1.25.
|
||||
|
||||
It was replaced by [Pod Security Admission (PSA)](https://kubernetes.io/docs/concepts/security/pod-security-admission/), which is enabled by default starting with v1.23.
|
||||
|
||||
Talos Linux automatically enables and configures PSA to enforce Pod Security Standards.
|
||||
These [Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) define three policies that cover the security spectrum:
|
||||
|
||||
* **Privileged**: Unrestricted policy, providing the widest possible level of permissions.
|
||||
* **Baseline**: Minimally restrictive policy.
|
||||
* **Restricted**: Heavily restricted policy.
|
||||
|
||||
By default, Talos with the help of PSA, applies the `baseline` profile to all namespaces, except for the `kube-system` namespace, which uses the `privileged` profile.
|
||||
|
||||
## Default PSA Configuration
|
||||
|
||||
Here is the default PSA configuration on Talos:
|
||||
|
||||
```yaml
|
||||
apiVersion: pod-security.admission.config.k8s.io/v1alpha1
|
||||
kind: PodSecurityConfiguration
|
||||
defaults:
|
||||
enforce: "baseline"
|
||||
enforce-version: "latest"
|
||||
audit: "restricted"
|
||||
audit-version: "latest"
|
||||
warn: "restricted"
|
||||
warn-version: "latest"
|
||||
exemptions:
|
||||
usernames: []
|
||||
runtimeClasses: []
|
||||
namespaces: [kube-system]
|
||||
```
|
||||
|
||||
This cluster-wide configuration:
|
||||
|
||||
* Enforces the `baseline` security profile by default.
|
||||
* Throws a warning, if the `restricted` profile is violated, but does not enforce this profile.
|
||||
|
||||
## Modify the Default PSA Configuraion
|
||||
|
||||
You can modify this PSA policy by updating the generated machine configuration before the cluster is created or on the fly by using the `talosctl` CLI utility.
|
||||
|
||||
Verify current admission plugin configuration with:
|
||||
|
||||
```bash
|
||||
$ talosctl get admissioncontrolconfigs.kubernetes.talos.dev admission-control -o yaml
|
||||
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: AdmissionControlConfigs.kubernetes.talos.dev
|
||||
id: admission-control
|
||||
version: 1
|
||||
owner: config.K8sControlPlaneController
|
||||
phase: running
|
||||
created: 2022-02-22T20:28:21Z
|
||||
updated: 2022-02-22T20:28:21Z
|
||||
spec:
|
||||
config:
|
||||
- name: PodSecurity
|
||||
configuration:
|
||||
apiVersion: pod-security.admission.config.k8s.io/v1alpha1
|
||||
defaults:
|
||||
audit: restricted
|
||||
audit-version: latest
|
||||
enforce: baseline
|
||||
enforce-version: latest
|
||||
warn: restricted
|
||||
warn-version: latest
|
||||
exemptions:
|
||||
namespaces:
|
||||
- kube-system
|
||||
runtimeClasses: []
|
||||
usernames: []
|
||||
kind: PodSecurityConfiguration
|
||||
```
|
||||
|
||||
## Workloads That Satisfy the Different Security Profiles
|
||||
|
||||
To deploy a workload that satisfies both the `baseline` and `restricted` profiles, you must ensure that your workloads:
|
||||
|
||||
* Run as non-root users (UID 1000 or higher)
|
||||
* Use read-only root filesystems where possible
|
||||
* Minimize or eliminate kernel capabilities
|
||||
|
||||
To see how PSA treats workloads that violate security profiles, consider these examples that violate the `restricted`, `baseline`, or both profiles:
|
||||
|
||||
* A Deployment that satisfies the `restricted` profile
|
||||
* A Deployment that meets `baseline` requirements but `violates` restricted
|
||||
* A DaemonSet that violates both `restricted` and `baseline` profiles
|
||||
|
||||
### Deployment that Satisfies the Restricted Profile
|
||||
|
||||
This Deployment complies with the `restricted` profile and does not produce any errors or warnings when applied:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: example-workload
|
||||
namespace: default
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: example-workload
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: example-workload
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: example-workload
|
||||
image: ghcr.io/siderolabs/example-workload
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
runAsNonRoot: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
```
|
||||
|
||||
When you apply this `example-workload` Deployment, it successfully creates the Deployment and deploys its pods:
|
||||
|
||||
<pre>
|
||||
$ kubectl apply -f example-workload.yaml
|
||||
deployment.apps/example-workload created
|
||||
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
example-workload-6f847d64b9-jctkv 1/1 Running 0 10s
|
||||
|
||||
</pre>
|
||||
|
||||
This is because the Deployment follows Talos’ recommended security practices, which, as shown in the Deployment configuration, include:
|
||||
|
||||
* **runAsNonRoot: true**: Prevents the container from running as root.
|
||||
* **runAsUser and runAsGroup**: Ensures a dedicated non-root user (UID/GID 1000) runs the process.
|
||||
* **fsGroup**: Sets file system group ownership for shared volumes.
|
||||
* **seccompProfile: RuntimeDefault**: Uses the default seccomp profile to restrict available system calls.
|
||||
* **allowPrivilegeEscalation: false**: Blocks processes from gaining additional privileges.
|
||||
* **capabilities: drop: [ALL]**: Removes unnecessary Linux capabilities.
|
||||
|
||||
### Deployment that Violates the Restricted but Meets Baseline Profile
|
||||
|
||||
Run the following command to create a Deployment that complies with the `baseline` profile but violates the `restricted` profile:
|
||||
|
||||
```bash
|
||||
kubectl create deployment nginx --image=nginx
|
||||
```
|
||||
|
||||
Applying this Deployment triggers warnings indicating excessive privileges:
|
||||
|
||||
<pre>
|
||||
Warning: would violate PodSecurity "restricted:latest": allowPrivilegeEscalation != false (container "nginx" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (container "nginx" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or container "nginx" must set securityContext.runAsNonRoot=true), seccompProfile (pod or container "nginx" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
|
||||
deployment.apps/nginx created
|
||||
</pre>
|
||||
|
||||
Despite these warnings, the deployment and its pods are still created successfully because it complies with the default Talos `baseline` security profile:
|
||||
|
||||
<pre>
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
nginx-85b98978db-j68l8 1/1 Running 0 2m3s
|
||||
</pre>
|
||||
|
||||
### DaemonSet that Fails Both the Restricted and Baseline Profiles
|
||||
|
||||
This DaemonSet violates both the `baseline` and `restricted` profiles:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
app: debug-container
|
||||
name: debug-container
|
||||
namespace: default
|
||||
spec:
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app: debug-container
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
labels:
|
||||
app: debug-container
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- "360000"
|
||||
command:
|
||||
- /bin/sleep
|
||||
image: ubuntu:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: debug-container
|
||||
resources: {}
|
||||
securityContext:
|
||||
privileged: true
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
hostIPC: true
|
||||
hostPID: true
|
||||
hostNetwork: true
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext: {}
|
||||
terminationGracePeriodSeconds: 30
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 0
|
||||
maxUnavailable: 1
|
||||
type: RollingUpdate
|
||||
```
|
||||
|
||||
When you apply this DaemonSet:
|
||||
|
||||
* An error is thrown, showing that the DaemonSet requests too much privileges:
|
||||
|
||||
<pre>
|
||||
Warning: would violate PodSecurity "restricted:latest": host namespaces (hostNetwork=true, hostPID=true, hostIPC=true), privileged (container "debug-container" must not set securityContext.privileged=true), allowPrivilegeEscalation != false (container "debug-container" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (container "debug-container" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or container "debug-container" must set securityContext.runAsNonRoot=true), seccompProfile (pod or container "debug-container" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
|
||||
daemonset.apps/debug-container created
|
||||
</pre>
|
||||
|
||||
* The DaemonSet object gets created but no pods are scheduled:
|
||||
|
||||
<pre>
|
||||
$ kubectl get ds
|
||||
|
||||
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
|
||||
debug-container 0 0 0 0 0 <none> 34s
|
||||
|
||||
</pre>
|
||||
|
||||
* When you describe the DaemonSet, the `Events` section shows that Pod Security Admission errors are blocking pod creation:
|
||||
|
||||
<pre>
|
||||
$ kubectl describe ds debug-container
|
||||
...
|
||||
Warning FailedCreate 92s daemonset-controller Error creating: pods "debug-container-kwzdj" is forbidden: violates PodSecurity "baseline:latest": host namespaces (hostNetwork=true, hostPID=true, hostIPC=true), privileged (container "debug-container" must not set securityContext.privileged=true)
|
||||
</pre>
|
||||
|
||||
This happens because the DaemonSet does not comply with the enforced `baseline` Pod Security profile.
|
||||
|
||||
## Override the Pod Security Admission Configuration
|
||||
|
||||
You can override the Pod Security Admission configuration at the namespace level.
|
||||
|
||||
This is especially useful for applications like Prometheus node exporter or storage solutions that require more relaxed Pod Security Standards.
|
||||
|
||||
Using the DaemonSet workload example, you can update the enforced policy to `privilege` for its namespace, which is the default namespace.
|
||||
|
||||
```bash
|
||||
kubectl label ns default pod-security.kubernetes.io/enforce=privileged
|
||||
namespace/default labeled
|
||||
```
|
||||
|
||||
With this update, the DaemonSet is successfully running:
|
||||
|
||||
<pre>
|
||||
$ kubectl get ds
|
||||
|
||||
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
|
||||
debug-container 2 2 0 2 0 <none> 4s
|
||||
</pre>
|
||||
@ -0,0 +1,118 @@
|
||||
---
|
||||
title: "Seccomp Profiles"
|
||||
description: "Using custom Seccomp Profiles with Kubernetes workloads."
|
||||
aliases:
|
||||
- ../../guides/pod-security
|
||||
---
|
||||
|
||||
Seccomp stands for secure computing mode and has been a feature of the Linux kernel since version 2.6.12.
|
||||
It can be used to sandbox the privileges of a process, restricting the calls it is able to make from userspace into the kernel.
|
||||
|
||||
Refer the [Kubernetes Seccomp Guide](https://kubernetes.io/docs/tutorials/security/seccomp/) for more details.
|
||||
|
||||
In this guide we are going to configure a custom Seccomp Profile that logs all syscalls made by the workload.
|
||||
|
||||
## Preparing the nodes
|
||||
|
||||
Create a machine config path with the contents below and save as `patch.yaml`
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
seccompProfiles:
|
||||
- name: audit.json
|
||||
value:
|
||||
defaultAction: SCMP_ACT_LOG
|
||||
```
|
||||
|
||||
Apply the machine config to all the nodes using talosctl:
|
||||
|
||||
```bash
|
||||
talosctl -e <endpoint ip/hostname> -n <node ip/hostname> patch mc -p @patch.yaml
|
||||
```
|
||||
|
||||
This would create a seccomp profile name `audit.json` on the node at `/var/lib/kubelet/seccomp/profiles`.
|
||||
|
||||
The profiles can be used by Kubernetes pods by specfying the pod `securityContext` as below:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: Localhost
|
||||
localhostProfile: profiles/audit.json
|
||||
```
|
||||
|
||||
> Note that the `localhostProfile` uses the name of the profile created under `profiles` directory.
|
||||
> So make sure to use path as `profiles/<profile-name.json>`
|
||||
|
||||
This can be verfied by running the below commands:
|
||||
|
||||
```bash
|
||||
talosctl -e <endpoint ip/hostname> -n <node ip/hostname> get seccompprofiles
|
||||
```
|
||||
|
||||
An output similar to below can be observed:
|
||||
|
||||
```text
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
10.5.0.3 cri SeccompProfile audit.json 1
|
||||
```
|
||||
|
||||
The content of the seccomp profile can be viewed by running the below command:
|
||||
|
||||
```bash
|
||||
talosctl -e <endpoint ip/hostname> -n <node ip/hostname> read /var/lib/kubelet/seccomp/profiles/audit.json
|
||||
```
|
||||
|
||||
An output similar to below can be observed:
|
||||
|
||||
```text
|
||||
{"defaultAction":"SCMP_ACT_LOG"}
|
||||
```
|
||||
|
||||
## Create a Kubernetes workload that uses the custom Seccomp Profile
|
||||
|
||||
Here we'll be using an example workload from the Kubernetes [documentation](https://kubernetes.io/docs/tutorials/security/seccomp/).
|
||||
|
||||
First open up a second terminal and run the following talosctl command so that we can view the Syscalls being logged in realtime:
|
||||
|
||||
```bash
|
||||
talosctl -e <endpoint ip/hostname> -n <node ip/hostname> dmesg --follow --tail
|
||||
```
|
||||
|
||||
Now deploy the example workload from the Kubernetes documentation:
|
||||
|
||||
```bash
|
||||
kubectl apply -f https://k8s.io/examples/pods/security/seccomp/ga/audit-pod.yaml
|
||||
```
|
||||
|
||||
Once the pod starts running the terminal running `talosctl dmesg` command from above should log similar to below:
|
||||
|
||||
```text
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.489473063Z]: cni0: port 1(veth32488a86) entered blocking state
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.490852063Z]: cni0: port 1(veth32488a86) entered disabled state
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.492470063Z]: device veth32488a86 entered promiscuous mode
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.503105063Z]: IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.503944063Z]: IPv6: ADDRCONF(NETDEV_CHANGE): veth32488a86: link becomes ready
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.504764063Z]: cni0: port 1(veth32488a86) entered blocking state
|
||||
10.5.0.3: kern: info: [2022-07-28T11:49:42.505423063Z]: cni0: port 1(veth32488a86) entered forwarding state
|
||||
10.5.0.3: kern: warning: [2022-07-28T11:49:44.873616063Z]: kauditd_printk_skb: 14 callbacks suppressed
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.873619063Z]: audit: type=1326 audit(1659008985.445:25): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=3 compat=0 ip=0x55ec0657bd3b code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.876609063Z]: audit: type=1326 audit(1659008985.445:26): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=3 compat=0 ip=0x55ec0657bd3b code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.878789063Z]: audit: type=1326 audit(1659008985.449:27): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=257 compat=0 ip=0x55ec0657bdaa code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.886693063Z]: audit: type=1326 audit(1659008985.461:28): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=202 compat=0 ip=0x55ec06532b43 code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.888764063Z]: audit: type=1326 audit(1659008985.461:29): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=202 compat=0 ip=0x55ec06532b43 code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.891009063Z]: audit: type=1326 audit(1659008985.461:30): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=1 compat=0 ip=0x55ec0657bd3b code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.893162063Z]: audit: type=1326 audit(1659008985.461:31): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=3 compat=0 ip=0x55ec0657bd3b code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.895365063Z]: audit: type=1326 audit(1659008985.461:32): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=39 compat=0 ip=0x55ec066eb68b code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.898306063Z]: audit: type=1326 audit(1659008985.461:33): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="runc:[2:INIT]" exe="/" sig=0 arch=c000003e syscall=59 compat=0 ip=0x55ec0657be16 code=0x7ffc0000
|
||||
10.5.0.3: kern: notice: [2022-07-28T11:49:44.901518063Z]: audit: type=1326 audit(1659008985.473:34): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=2784 comm="http-echo" exe="/http-echo" sig=0 arch=c000003e syscall=158 compat=0 ip=0x455f35 code=0x7ffc0000
|
||||
```
|
||||
|
||||
## Cleanup
|
||||
|
||||
You can clean up the test resources by running the following command:
|
||||
|
||||
```bash
|
||||
kubectl delete pod audit-pod
|
||||
```
|
||||
163
website/content/v1.12/kubernetes-guides/configuration/storage.md
Normal file
163
website/content/v1.12/kubernetes-guides/configuration/storage.md
Normal file
@ -0,0 +1,163 @@
|
||||
---
|
||||
title: "Storage"
|
||||
description: "Setting up storage for a Kubernetes cluster"
|
||||
aliases:
|
||||
- ../../guides/storage
|
||||
---
|
||||
|
||||
In Kubernetes, using storage in the right way is well-facilitated by the API.
|
||||
However, unless you are running in a major public cloud, that API may not be hooked up to anything.
|
||||
There are a _lot_ of options out there, and it can be fairly bewildering.
|
||||
|
||||
For Talos, we have some recommendations to make the decision easier.
|
||||
|
||||
## Public Cloud
|
||||
|
||||
If you are running on a major public cloud, use their block storage.
|
||||
It is easy and automatic.
|
||||
|
||||
## Storage Clusters
|
||||
|
||||
> **Sidero Labs** recommends having separate disks (separate from the Talos install disk) dedicated for storage.
|
||||
|
||||
Redundancy, scaling capabilities, reliability, speed, maintenance load, and ease of use are all factors you must consider when managing your own storage.
|
||||
|
||||
Running a storage cluster can be a very good choice when managing your own storage.
|
||||
The following projects are known to work with Talos Linux and provide good options, depending on your situation.
|
||||
|
||||
**MayaStor**: Ultra-low latency and high-performance workloads.
|
||||
|
||||
**Longhorn**: Simple, reliable, easy-to-use Kubernetes storage with easy replication and snapshots.
|
||||
|
||||
**Rook/Ceph**: Enterprise-scale, distributed, multi-tenant storage (block, file, and object storage)
|
||||
|
||||
Also, if you need _both_ mount-once _and_ mount-many capabilities, Ceph is your answer.
|
||||
|
||||
> Please note that _most_ people should not use mount-many semantics.
|
||||
> NFS is pervasive because it is old and easy, _not_ because it is a good idea.
|
||||
> There are all manner of locking, performance, change control, and reliability concerns inherent in _any_ mount-many situation, so we **strongly** recommend you avoid this method.
|
||||
|
||||
### Longhorn
|
||||
|
||||
Documentation for installing Longhorn on Talos Linux is available on the [Longhorn site](https://longhorn.io/docs/1.9.0/advanced-resources/os-distro-specific/talos-linux-support/).
|
||||
|
||||
### Rook/Ceph
|
||||
|
||||
[Ceph](https://ceph.io) is a mature open source storage system, that can provide almost any type of storage.
|
||||
It scales well, and enables the operator to easily add and remove storage with no downtime.
|
||||
It comes bundled with an S3-compatible object store; CephFS, a NFS-like clustered filesystem; and RBD, a block storage system.
|
||||
|
||||
With the help of [Rook](https://rook.io), the vast majority of the complexity of Ceph is hidden away, allowing you to control almost everything about your Ceph cluster from fairly simple Kubernetes CRDs.
|
||||
|
||||
However, Ceph can be rather slow for small clusters.
|
||||
It relies heavily on CPUs and massive parallelization for performance.
|
||||
If your cluster is small, just running Ceph may eat up a significant amount of the resources you have available.
|
||||
|
||||
Troubleshooting Ceph can be difficult if you do not understand its architecture.
|
||||
There are very good tools for inspection and debugging, but this is still frequently seen as a concern.
|
||||
|
||||
### OpenEBS Mayastor replicated storage
|
||||
|
||||
[Mayastor](https://github.com/openebs/Mayastor) is an OpenEBS project built in Rust utilizing the modern NVMEoF system.
|
||||
|
||||
#### Deploy Mayastor
|
||||
|
||||
Mayastor has documentation specific to installing on Talos Linux in their official [documentation](https://openebs.io/docs/Solutioning/openebs-on-kubernetes-platforms/talos)
|
||||
|
||||
Installing on Talos Linux requires patching the Pod Security policies, enabling Huge Page support, and labels.
|
||||
This is all covered in the Mayastor documentation,
|
||||
|
||||
We need to disable the init container that checks for the `nvme_tcp` module, since Talos has that module built-in.
|
||||
|
||||
Create a helm values file `mayastor-values.yaml` with the following contents:
|
||||
|
||||
```yaml
|
||||
mayastor:
|
||||
csi:
|
||||
node:
|
||||
initContainers:
|
||||
enabled: false
|
||||
```
|
||||
|
||||
If you do not need to use the LVM and ZFS engines they can be disabled in the values file:
|
||||
|
||||
```yaml
|
||||
engines:
|
||||
local:
|
||||
lvm:
|
||||
enabled: false
|
||||
zfs:
|
||||
enabled: false
|
||||
```
|
||||
|
||||
Continue setting up [Mayastor](https://openebs.io/docs/quickstart-guide/installation#installation-via-helm) using the official documentation, passing the values file.
|
||||
|
||||
Follow the Post-Installation from official [documentation](https://openebs.io/docs/quickstart-guide/installation#post-installation-considerations) to use Local Storage or Replicated Storage.
|
||||
|
||||
### Piraeus / LINSTOR
|
||||
|
||||
* [Piraeus-Operator](https://piraeus.io/)
|
||||
* [LINSTOR](https://linbit.com/drbd/)
|
||||
* [DRBD Extension](https://github.com/siderolabs/extensions#storage)
|
||||
|
||||
#### Install Piraeus Operator V2
|
||||
|
||||
There is already a how-to for Talos: [Link](https://piraeus.io/docs/stable/how-to/talos/)
|
||||
|
||||
#### Create first storage pool and PVC
|
||||
|
||||
Before proceeding, install linstor plugin for kubectl:
|
||||
https://github.com/piraeusdatastore/kubectl-linstor
|
||||
|
||||
Or use [krew](https://krew.sigs.k8s.io/): `kubectl krew install linstor`
|
||||
|
||||
```sh
|
||||
# Create device pool on a blank (no partition table!) disk on node01
|
||||
kubectl linstor physical-storage create-device-pool --pool-name nvme_lvm_pool LVM node01 /dev/nvme0n1 --storage-pool nvme_pool
|
||||
```
|
||||
|
||||
piraeus-sc.yml
|
||||
|
||||
```yaml
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: simple-nvme
|
||||
parameters:
|
||||
csi.storage.k8s.io/fstype: xfs
|
||||
linstor.csi.linbit.com/autoPlace: "3"
|
||||
linstor.csi.linbit.com/storagePool: nvme_pool
|
||||
provisioner: linstor.csi.linbit.com
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
```
|
||||
|
||||
```sh
|
||||
# Create storage class
|
||||
kubectl apply -f piraeus-sc.yml
|
||||
```
|
||||
|
||||
## NFS
|
||||
|
||||
NFS is slow, has all kinds of bottlenecks involving contention, distributed locking, single points of service, and more.
|
||||
However, it is supported by a wide variety of systems, such as NetApp storage arrays.
|
||||
|
||||
The NFS client is part of the [`kubelet` image](https://github.com/talos-systems/kubelet) maintained by the Talos team.
|
||||
This means that the version installed in your running `kubelet` is the version of NFS supported by Talos.
|
||||
You can reduce some of the contention problems by parceling Persistent Volumes from separate underlying directories.
|
||||
|
||||
## Object storage
|
||||
|
||||
Ceph comes with an S3-compatible object store, but there are other options, as
|
||||
well.
|
||||
These can often be built on top of other storage backends.
|
||||
For instance, you may have your block storage running with Mayastor but assign a
|
||||
Pod a large Persistent Volume to serve your object store.
|
||||
|
||||
One of the most popular open source add-on object stores is [MinIO](https://min.io/).
|
||||
|
||||
## Others (iSCSI)
|
||||
|
||||
The most common remaining systems involve iSCSI in one form or another.
|
||||
iSCSI in Linux is facilitated by [open-iscsi](https://github.com/open-iscsi/open-iscsi).
|
||||
|
||||
iSCSI support in Talos is now supported via the [iscsi-tools](https://github.com/siderolabs/extensions/pkgs/container/iscsi-tools) [system extension]({{< relref "../../talos-guides/configuration/system-extensions" >}}) installed.
|
||||
@ -0,0 +1,259 @@
|
||||
---
|
||||
title: "iSCSI Storage with Synology CSI"
|
||||
description: "Automatically provision iSCSI volumes on a Synology NAS with the synology-csi driver."
|
||||
aliases:
|
||||
- ../../guides/synology-csi
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
Synology is a company that specializes in Network Attached Storage (NAS) devices.
|
||||
They provide a number of features within a simple web OS, including an LDAP server, Docker support, and (perhaps most relevant to this guide) function as an iSCSI host.
|
||||
The focus of this guide is to allow a Kubernetes cluster running on Talos to provision Kubernetes storage (both dynamic or static) on a Synology NAS using a direct integration, rather than relying on an intermediary layer like Rook/Ceph or Maystor.
|
||||
|
||||
This guide assumes a very basic familiarity with iSCSI terminology (LUN, iSCSI target, etc.).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* Synology NAS running DSM 7.0 or above
|
||||
* Provisioned Talos cluster running Kubernetes v1.20 or above with `siderolabs/iscsi-tools` extension installed
|
||||
* (Optional) Both [Volume Snapshot CRDs](https://github.com/kubernetes-csi/external-snapshotter/tree/v4.0.0/client/config/crd) and the [common snapshot controller](https://github.com/kubernetes-csi/external-snapshotter/tree/v4.0.0/deploy/kubernetes/snapshot-controller) must be installed in your Kubernetes cluster if you want to use the **Snapshot** feature
|
||||
|
||||
## Setting up the Synology user account
|
||||
|
||||
The `synology-csi` controller interacts with your NAS in two different ways: via the API and via the iSCSI protocol.
|
||||
Actions such as creating a new iSCSI target or deleting an old one are accomplished via the Synology API, and require administrator access.
|
||||
On the other hand, mounting the disk to a pod and reading from / writing to it will utilize iSCSI.
|
||||
Because you can only authenticate with one account per DSM configured, that account needs to have admin privileges.
|
||||
In order to minimize access in the case of these credentials being compromised, you should configure the account with the lease possible amount of access – explicitly specify "No Access" on all volumes when configuring the user permissions.
|
||||
|
||||
## Setting up the Synology CSI
|
||||
|
||||
> Note: this guide is paraphrased from the Synology CSI [readme](https://github.com/zebernst/synology-csi-talos).
|
||||
> Please consult the readme for more in-depth instructions and explanations.
|
||||
|
||||
Clone the git repository.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/zebernst/synology-csi-talos.git
|
||||
```
|
||||
|
||||
While Synology provides some automated scripts to deploy the CSI driver, they can be finicky especially when making changes to the source code.
|
||||
We will be configuring and deploying things manually in this guide.
|
||||
|
||||
The relevant files we will be touching are in the following locations:
|
||||
|
||||
```text
|
||||
.
|
||||
├── Dockerfile
|
||||
├── Makefile
|
||||
├── config
|
||||
│ └── client-info-template.yml
|
||||
└── deploy
|
||||
└── kubernetes
|
||||
└── v1.20
|
||||
├── controller.yml
|
||||
├── csi-driver.yml
|
||||
├── namespace.yml
|
||||
├── node.yml
|
||||
├── snapshotter
|
||||
│ ├── snapshotter.yaml
|
||||
│ └── volume-snapshot-class.yml
|
||||
└── storage-class.yml
|
||||
```
|
||||
|
||||
### Configure connection info
|
||||
|
||||
Use `config/client-info-template.yml` as an example to configure the connection information for DSM.
|
||||
You can specify **one or more** storage systems on which the CSI volumes will be created.
|
||||
See below for an example:
|
||||
|
||||
```yaml
|
||||
---
|
||||
clients:
|
||||
- host: 192.168.1.1 # ipv4 address or domain of the DSM
|
||||
port: 5000 # port for connecting to the DSM
|
||||
https: false # set this true to use https. you need to specify the port to DSM HTTPS port as well
|
||||
username: username # username
|
||||
password: password # password
|
||||
```
|
||||
|
||||
Create a Kubernetes secret using the client information config file.
|
||||
|
||||
```bash
|
||||
kubectl create secret -n synology-csi generic client-info-secret --from-file=config/client-info.yml
|
||||
```
|
||||
|
||||
Note that if you rename the secret to something other than `client-info-secret`, make sure you update the corresponding references in the deployment manifests as well.
|
||||
|
||||
### Build the Talos-compatible image
|
||||
|
||||
Modify the `Makefile` so that the image is built and tagged under your GitHub Container Registry username:
|
||||
|
||||
```makefile
|
||||
REGISTRY_NAME=ghcr.io/<username>
|
||||
```
|
||||
|
||||
When you run `make docker-build` or `make docker-build-multiarch`, it will push the resulting image to `ghcr.io/<username>/synology-csi:v1.1.0`.
|
||||
Ensure that you find and change any reference to `synology/synology-csi:v1.1.0` to point to your newly-pushed image within the deployment manifests.
|
||||
|
||||
### Configure the CSI driver
|
||||
|
||||
By default, the deployment manifests include one storage class and one volume snapshot class.
|
||||
See below for examples:
|
||||
|
||||
```yaml
|
||||
---
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: "false"
|
||||
name: syno-storage
|
||||
provisioner: csi.san.synology.com
|
||||
parameters:
|
||||
fsType: 'ext4'
|
||||
dsm: '192.168.1.1'
|
||||
location: '/volume1'
|
||||
reclaimPolicy: Retain
|
||||
allowVolumeExpansion: true
|
||||
---
|
||||
apiVersion: snapshot.storage.k8s.io/v1
|
||||
kind: VolumeSnapshotClass
|
||||
metadata:
|
||||
name: syno-snapshot
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: "false"
|
||||
driver: csi.san.synology.com
|
||||
deletionPolicy: Delete
|
||||
parameters:
|
||||
description: 'Kubernetes CSI'
|
||||
```
|
||||
|
||||
It can be useful to configure multiple different StorageClasses.
|
||||
For example, a popular strategy is to create two nearly identical StorageClasses, with one configured with `reclaimPolicy: Retain` and the other with `reclaimPolicy: Delete`.
|
||||
Alternately, a workload may require a specific filesystem, such as `ext4`.
|
||||
If a Synology NAS is going to be the most common way to configure storage on your cluster, it can be convenient to add the `storageclass.kubernetes.io/is-default-class: "true"` annotation to one of your StorageClasses.
|
||||
|
||||
The following table details the configurable parameters for the Synology StorageClass.
|
||||
|
||||
| Name | Type | Description | Default | Supported protocols |
|
||||
| ------------------------------------------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | ------------------- |
|
||||
| *dsm* | string | The IPv4 address of your DSM, which must be included in the `client-info.yml` for the CSI driver to log in to DSM | - | iSCSI, SMB |
|
||||
| *location* | string | The location (/volume1, /volume2, ...) on DSM where the LUN for *PersistentVolume* will be created | - | iSCSI, SMB |
|
||||
| *fsType* | string | The formatting file system of the *PersistentVolumes* when you mount them on the pods. This parameter only works with iSCSI. For SMB, the fsType is always ‘cifs‘. | `ext4` | iSCSI |
|
||||
| *protocol* | string | The backing storage protocol. Enter ‘iscsi’ to create LUNs or ‘smb‘ to create shared folders on DSM. | `iscsi` | iSCSI, SMB |
|
||||
| *csi.storage.k8s.io/node-stage-secret-name* | string | The name of node-stage-secret. Required if DSM shared folder is accessed via SMB. | - | SMB |
|
||||
| *csi.storage.k8s.io/node-stage-secret-namespace* | string | The namespace of node-stage-secret. Required if DSM shared folder is accessed via SMB. | - | SMB |
|
||||
|
||||
The VolumeSnapshotClass can be similarly configured with the following parameters:
|
||||
|
||||
| Name | Type | Description | Default | Supported protocols |
|
||||
| ------------- | ------ | -------------------------------------------- | ------- | ------------------- |
|
||||
| *description* | string | The description of the snapshot on DSM | - | iSCSI |
|
||||
| *is_locked* | string | Whether you want to lock the snapshot on DSM | `false` | iSCSI, SMB |
|
||||
|
||||
### Apply YAML manifests
|
||||
|
||||
Once you have created the desired StorageClass(es) and VolumeSnapshotClass(es), the final step is to apply the Kubernetes manifests against the cluster.
|
||||
The easiest way to apply them all at once is to create a `kustomization.yaml` file in the same directory as the manifests and use Kustomize to apply:
|
||||
|
||||
```bash
|
||||
kubectl apply -k path/to/manifest/directory
|
||||
```
|
||||
|
||||
Alternately, you can apply each manifest one-by-one:
|
||||
|
||||
```bash
|
||||
kubectl apply -f <file>
|
||||
```
|
||||
|
||||
## Run performance tests
|
||||
|
||||
In order to test the provisioning, mounting, and performance of using a Synology NAS as Kubernetes persistent storage, use the following command:
|
||||
|
||||
```bash
|
||||
kubectl apply -f speedtest.yaml
|
||||
```
|
||||
|
||||
Content of speedtest.yaml ([source](https://github.com/phnmnl/k8s-volume-test))
|
||||
|
||||
```yaml
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: test-claim
|
||||
spec:
|
||||
# storageClassName: syno-storage
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 5G
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: read
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: read
|
||||
labels:
|
||||
app: speedtest
|
||||
job: read
|
||||
spec:
|
||||
containers:
|
||||
- name: read
|
||||
image: ubuntu:xenial
|
||||
command: ["dd","if=/mnt/pv/test.img","of=/dev/null","bs=8k"]
|
||||
volumeMounts:
|
||||
- mountPath: "/mnt/pv"
|
||||
name: test-volume
|
||||
volumes:
|
||||
- name: test-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: test-claim
|
||||
restartPolicy: Never
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: write
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
name: write
|
||||
labels:
|
||||
app: speedtest
|
||||
job: write
|
||||
spec:
|
||||
containers:
|
||||
- name: write
|
||||
image: ubuntu:xenial
|
||||
command: ["dd","if=/dev/zero","of=/mnt/pv/test.img","bs=1G","count=1","oflag=dsync"]
|
||||
volumeMounts:
|
||||
- mountPath: "/mnt/pv"
|
||||
name: test-volume
|
||||
volumes:
|
||||
- name: test-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: test-claim
|
||||
restartPolicy: Never
|
||||
```
|
||||
|
||||
If these two jobs complete successfully, use the following commands to get the results of the speed tests:
|
||||
|
||||
```bash
|
||||
# Pod logs for read test:
|
||||
kubectl logs -l app=speedtest,job=read
|
||||
|
||||
# Pod logs for write test:
|
||||
kubectl logs -l app=speedtest,job=write
|
||||
```
|
||||
|
||||
When you're satisfied with the results of the test, delete the artifacts created from the speedtest:
|
||||
|
||||
```bash
|
||||
kubectl delete -f speedtest.yaml
|
||||
```
|
||||
@ -0,0 +1,30 @@
|
||||
---
|
||||
title: "User Namespaces"
|
||||
description: "Guide on how to configure Talos Cluster to support User Namespaces"
|
||||
---
|
||||
|
||||
User Namespaces are a feature of the Linux kernel that allows unprivileged users to have their own range of UIDs and GIDs, without needing to be root.
|
||||
|
||||
Refer to the [official documentation](https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/) for more information on Usernamespaces.
|
||||
|
||||
## Enabling Usernamespaces
|
||||
|
||||
To enable User Namespaces in Talos, you need to add the following configuration to Talos machine configuration:
|
||||
|
||||
```yaml
|
||||
---
|
||||
cluster:
|
||||
apiServer:
|
||||
extraArgs:
|
||||
feature-gates: UserNamespacesSupport=true,UserNamespacesPodSecurityStandards=true
|
||||
machine:
|
||||
sysctls:
|
||||
user.max_user_namespaces: "11255"
|
||||
kubelet:
|
||||
extraConfig:
|
||||
featureGates:
|
||||
UserNamespacesSupport: true
|
||||
UserNamespacesPodSecurityStandards: true
|
||||
```
|
||||
|
||||
After applying the configuration, refer to the [official documentation](https://kubernetes.io/docs/tasks/configure-pod-container/user-namespaces/) to configure workloads to use User Namespaces.
|
||||
@ -0,0 +1,5 @@
|
||||
---
|
||||
title: "Network"
|
||||
weight: 20
|
||||
description: "Managing the Kubernetes cluster networking"
|
||||
---
|
||||
@ -0,0 +1,437 @@
|
||||
---
|
||||
title: "Deploying Cilium CNI"
|
||||
description: "In this guide you will learn how to set up Cilium CNI on Talos."
|
||||
aliases:
|
||||
- ../../guides/deploying-cilium
|
||||
---
|
||||
|
||||
> Cilium can be installed either via the `cilium` cli or using `helm`.
|
||||
|
||||
This documentation will outline installing Cilium CNI v1.18.0 on Talos in six different ways.
|
||||
Adhering to Talos principles we'll deploy Cilium with IPAM mode set to Kubernetes, and using the `cgroupv2` and `bpffs` mount that talos already provides.
|
||||
As Talos does not allow loading kernel modules by Kubernetes workloads, `SYS_MODULE` capability needs to be dropped from the Cilium default set of values, this override can be seen in the helm/cilium cli install commands.
|
||||
Each method can either install Cilium using kube proxy (default) or without: [Kubernetes Without kube-proxy](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/)
|
||||
|
||||
In this guide we assume that [KubePrism]({{< relref "../configuration/kubeprism" >}}) is enabled and configured to use the port 7445.
|
||||
|
||||
## Machine config preparation
|
||||
|
||||
When generating the machine config for a node set the CNI to none.
|
||||
For example using a config patch:
|
||||
|
||||
Create a `patch.yaml` file with the following contents:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
network:
|
||||
cni:
|
||||
name: none
|
||||
```
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch @patch.yaml
|
||||
```
|
||||
|
||||
Or if you want to deploy Cilium without kube-proxy, you also need to disable kube proxy:
|
||||
|
||||
Create a `patch.yaml` file with the following contents:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
network:
|
||||
cni:
|
||||
name: none
|
||||
proxy:
|
||||
disabled: true
|
||||
```
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch @patch.yaml
|
||||
```
|
||||
|
||||
### Installation using Cilium CLI
|
||||
|
||||
> Note: It is recommended to template the cilium manifest using helm and use it as part of Talos machine config, but if you want to install Cilium using the Cilium CLI, you can follow the steps below.
|
||||
|
||||
Install the [Cilium CLI](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/#install-the-cilium-cli) following the steps here.
|
||||
|
||||
#### With kube-proxy
|
||||
|
||||
```bash
|
||||
cilium install \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=false \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup
|
||||
```
|
||||
|
||||
#### Without kube-proxy
|
||||
|
||||
```bash
|
||||
cilium install \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=true \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup \
|
||||
--set k8sServiceHost=localhost \
|
||||
--set k8sServicePort=7445
|
||||
```
|
||||
|
||||
Or if you want to deploy Cilium with support for Gateway API (requires installing cilium without kube-proxy), install [Gateway API CRDs](https://docs.cilium.io/en/stable/network/servicemesh/gateway-api/gateway-api/#prerequisites) and set some extra parameters:
|
||||
|
||||
```bash
|
||||
cilium install \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=true \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup \
|
||||
--set k8sServiceHost=localhost \
|
||||
--set k8sServicePort=7445 \
|
||||
--set gatewayAPI.enabled=true \
|
||||
--set gatewayAPI.enableAlpn=true \
|
||||
--set gatewayAPI.enableAppProtocol=true
|
||||
```
|
||||
|
||||
> Note: If you plan to use gRPC and GRPCRoutes with TLS, you must enable ALPN by setting `gatewayAPI.enableAlpn=true`.
|
||||
> Since gRPC relies on HTTP/2, ALPN is required to negotiate HTTP/2 support between the client and server.
|
||||
|
||||
### Installation using Helm
|
||||
|
||||
Refer to [Installing with Helm](https://docs.cilium.io/en/stable/installation/k8s-install-helm/) for more information.
|
||||
|
||||
First we'll need to add the helm repo for Cilium.
|
||||
|
||||
```bash
|
||||
helm repo add cilium https://helm.cilium.io/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
### Method 1: Helm install
|
||||
|
||||
After applying the machine config and bootstrapping Talos will appear to hang on phase 18/19 with the message: retrying error: node not ready.
|
||||
This happens because nodes in Kubernetes are only marked as ready once the CNI is up.
|
||||
As there is no CNI defined, the boot process is pending and will reboot the node to retry after 10 minutes, this is expected behavior.
|
||||
|
||||
During this window you can install Cilium manually by running the following:
|
||||
|
||||
```bash
|
||||
helm install \
|
||||
cilium \
|
||||
cilium/cilium \
|
||||
--version 1.18.0 \
|
||||
--namespace kube-system \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=false \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup
|
||||
```
|
||||
|
||||
Or if you want to deploy Cilium without kube-proxy, also set some extra parameters:
|
||||
|
||||
```bash
|
||||
helm install \
|
||||
cilium \
|
||||
cilium/cilium \
|
||||
--version 1.18.0 \
|
||||
--namespace kube-system \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=true \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup \
|
||||
--set k8sServiceHost=localhost \
|
||||
--set k8sServicePort=7445
|
||||
```
|
||||
|
||||
And with GatewayAPI support:
|
||||
|
||||
```bash
|
||||
...
|
||||
--set=gatewayAPI.enabled=true \
|
||||
--set=gatewayAPI.enableAlpn=true \
|
||||
--set=gatewayAPI.enableAppProtocol=true
|
||||
```
|
||||
|
||||
After Cilium is installed the boot process should continue and complete successfully.
|
||||
|
||||
### Method 2: Helm manifests install
|
||||
|
||||
Instead of directly installing Cilium you can instead first generate the manifest and then apply it:
|
||||
|
||||
```bash
|
||||
helm template \
|
||||
cilium \
|
||||
cilium/cilium \
|
||||
--version 1.18.0 \
|
||||
--namespace kube-system \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=false \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup > cilium.yaml
|
||||
|
||||
kubectl apply -f cilium.yaml
|
||||
```
|
||||
|
||||
Without kube-proxy:
|
||||
|
||||
```bash
|
||||
helm template \
|
||||
cilium \
|
||||
cilium/cilium \
|
||||
--version 1.18.0 \
|
||||
--namespace kube-system \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set kubeProxyReplacement=true \
|
||||
--set securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
|
||||
--set securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
|
||||
--set cgroup.autoMount.enabled=false \
|
||||
--set cgroup.hostRoot=/sys/fs/cgroup \
|
||||
--set k8sServiceHost=localhost \
|
||||
--set k8sServicePort=7445 > cilium.yaml
|
||||
|
||||
kubectl apply -f cilium.yaml
|
||||
```
|
||||
|
||||
### Method 3: Helm manifests hosted install
|
||||
|
||||
After generating `cilium.yaml` using `helm template`, instead of applying this manifest directly during the Talos boot window (before the reboot timeout).
|
||||
You can also host this file somewhere and patch the machine config to apply this manifest automatically during bootstrap.
|
||||
To do this patch your machine configuration to include this config instead of the above:
|
||||
|
||||
Create a `patch.yaml` file with the following contents:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
network:
|
||||
cni:
|
||||
name: custom
|
||||
urls:
|
||||
- https://server.yourdomain.tld/some/path/cilium.yaml
|
||||
```
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch @patch.yaml
|
||||
```
|
||||
|
||||
However, beware of the fact that the helm generated Cilium manifest contains sensitive key material.
|
||||
As such you should definitely not host this somewhere publicly accessible.
|
||||
|
||||
### Method 4: Helm manifests inline install
|
||||
|
||||
A more secure option would be to include the `helm template` output manifest inside the machine configuration.
|
||||
The machine config should be generated with CNI set to `none`
|
||||
|
||||
Create a `patch.yaml` file with the following contents:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
network:
|
||||
cni:
|
||||
name: none
|
||||
```
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch @patch.yaml
|
||||
```
|
||||
|
||||
if deploying Cilium with `kube-proxy` disabled, you can also include the following:
|
||||
|
||||
Create a `patch.yaml` file with the following contents:
|
||||
|
||||
```yaml
|
||||
cluster:
|
||||
network:
|
||||
cni:
|
||||
name: none
|
||||
proxy:
|
||||
disabled: true
|
||||
```
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch @patch.yaml
|
||||
```
|
||||
|
||||
To do so patch this into your machine configuration:
|
||||
|
||||
``` yaml
|
||||
cluster:
|
||||
inlineManifests:
|
||||
- name: cilium
|
||||
contents: |
|
||||
--
|
||||
# Source: cilium/templates/cilium-agent/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: "cilium"
|
||||
namespace: kube-system
|
||||
---
|
||||
# Source: cilium/templates/cilium-operator/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
-> Your cilium.yaml file will be pretty long....
|
||||
```
|
||||
|
||||
This will install the Cilium manifests at just the right time during bootstrap.
|
||||
|
||||
Beware though:
|
||||
|
||||
- Changing the namespace when templating with Helm does not generate a manifest containing the yaml to create that namespace.
|
||||
As the inline manifest is processed from top to bottom make sure to manually put the namespace yaml at the start of the inline manifest.
|
||||
- Only add the Cilium inline manifest to the control plane nodes machine configuration.
|
||||
- Make sure all control plane nodes have an identical configuration.
|
||||
- If you delete any of the generated resources they will be restored whenever a control plane node reboots.
|
||||
- As a safety measure, Talos only creates missing resources from inline manifests, it never deletes or updates anything.
|
||||
- If you need to update a manifest make sure to first edit all control plane machine configurations and then run `talosctl upgrade-k8s` as it will take care of updating inline manifests.
|
||||
|
||||
### Method 5: Using a job
|
||||
|
||||
We can utilize a job pattern run arbitrary logic during bootstrap time.
|
||||
We can leverage this to our advantage to install Cilium by using an inline manifest as shown in the example below:
|
||||
|
||||
``` yaml
|
||||
cluster:
|
||||
inlineManifests:
|
||||
- name: cilium-install
|
||||
contents: |
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: cilium-install
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cluster-admin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: cilium-install
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
- effect: NoExecute
|
||||
operator: Exists
|
||||
- effect: PreferNoSchedule
|
||||
operator: Exists
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: PreferNoSchedule
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
serviceAccount: cilium-install
|
||||
serviceAccountName: cilium-install
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: cilium-install
|
||||
image: quay.io/cilium/cilium-cli:latest
|
||||
env:
|
||||
- name: KUBERNETES_SERVICE_HOST
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: status.podIP
|
||||
- name: KUBERNETES_SERVICE_PORT
|
||||
value: "6443"
|
||||
command:
|
||||
- cilium
|
||||
- install
|
||||
- --set
|
||||
- ipam.mode=kubernetes
|
||||
- --set
|
||||
- kubeProxyReplacement=true
|
||||
- --set
|
||||
- securityContext.capabilities.ciliumAgent={CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}
|
||||
- --set
|
||||
- securityContext.capabilities.cleanCiliumState={NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}
|
||||
- --set
|
||||
- cgroup.autoMount.enabled=false
|
||||
- --set
|
||||
- cgroup.hostRoot=/sys/fs/cgroup
|
||||
- --set
|
||||
- k8sServiceHost=localhost
|
||||
- --set
|
||||
- k8sServicePort=7445
|
||||
```
|
||||
|
||||
Because there is no CNI present at installation time the kubernetes.default.svc cannot be used to install Cilium, to overcome this limitation we'll utilize the host network connection to connect back to itself with 'hostNetwork: true' in tandem with the environment variables KUBERNETES_SERVICE_PORT and KUBERNETES_SERVICE_HOST.
|
||||
|
||||
The job runs a container to install cilium to your liking, after the job is finished Cilium can be managed/operated like usual.
|
||||
|
||||
The above can be combined exchanged with for example Method 3 to host arbitrary configurations externally but render/run them at bootstrap time.
|
||||
|
||||
## Known issues
|
||||
|
||||
- There are some gotchas when using Talos and Cilium on the Google cloud platform when using internal load balancers.
|
||||
For more details: [GCP ILB support / support scope local routes to be configured](https://github.com/siderolabs/talos/issues/4109)
|
||||
|
||||
- When using Talos `forwardKubeDNSToHost=true` option (which is enabled by default) in combination with cilium `bpf.masquerade=true`.
|
||||
There is a known issue that causes `CoreDNS` to not work correctly.
|
||||
As a workaround, configuring `forwardKubeDNSToHost=false` resolves the issue.
|
||||
For more details see [the discusssion here](https://github.com/siderolabs/talos/pull/9200)
|
||||
|
||||
## Other things to know
|
||||
|
||||
- After installing Cilium, `cilium connectivity test` might hang and/or fail with errors similar to
|
||||
|
||||
```Error creating: pods "client-69748f45d8-9b9jg" is forbidden: violates PodSecurity "baseline:latest": non-default capabilities (container "client" must not include "NET_RAW" in securityContext.capabilities.add)```
|
||||
|
||||
This is expected, you can workaround it by adding the `pod-security.kubernetes.io/enforce=privileged` [label on the namespace level]({{< relref "../configuration/pod-security">}}).
|
||||
|
||||
- Talos has full kernel module support for eBPF, See:
|
||||
- [Cilium System Requirements](https://docs.cilium.io/en/stable/operations/system_requirements/)
|
||||
- [Talos Kernel Config AMD64](https://github.com/siderolabs/pkgs/blob/main/kernel/build/config-amd64)
|
||||
- [Talos Kernel Config ARM64](https://github.com/siderolabs/pkgs/blob/main/kernel/build/config-arm64)
|
||||
200
website/content/v1.12/kubernetes-guides/network/multus.md
Normal file
200
website/content/v1.12/kubernetes-guides/network/multus.md
Normal file
@ -0,0 +1,200 @@
|
||||
---
|
||||
title: "Multus CNI"
|
||||
description: "A brief instruction on howto use Multus on Talos Linux"
|
||||
---
|
||||
|
||||
[Multus CNI](https://github.com/k8snetworkplumbingwg/multus-cni) is a container network interface (CNI) plugin for Kubernetes that enables attaching multiple network interfaces to pods.
|
||||
Typically, in Kubernetes each pod only has one network interface (apart from a loopback) -- with Multus you can create a multi-homed pod that has multiple interfaces.
|
||||
This is accomplished by Multus acting as a "meta-plugin", a CNI plugin that can call multiple other CNI plugins.
|
||||
|
||||
## Installation
|
||||
|
||||
Multus can be deployed by simply applying the `thick` `DaemonSet` with `kubectl`.
|
||||
|
||||
```bash
|
||||
kubectl apply -f https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/master/deployments/multus-daemonset-thick.yml
|
||||
```
|
||||
|
||||
This will create a `DaemonSet` and a CRD: `NetworkAttachmentDefinition`.
|
||||
This can be used to specify your network configuration.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Patching the `DaemonSet`
|
||||
|
||||
For Multus to properly work with Talos a change need to be made to the `DaemonSet`.
|
||||
Instead of of mounting the volume called `host-run-netns` on `/run/netns` it has to be mounted on `/var/run/netns`.
|
||||
|
||||
Edit the `DaemonSet` and change the volume `host-run-netns` from `/run/netns` to `/var/run/netns`.
|
||||
|
||||
```yaml
|
||||
...
|
||||
- name: host-run-netns
|
||||
hostPath:
|
||||
path: /var/run/netns/
|
||||
```
|
||||
|
||||
Failing to do so will leave your cluster crippled.
|
||||
Running pods will remain running but new pods and deployments will give you the following error in the events:
|
||||
|
||||
```text
|
||||
Normal Scheduled 3s default-scheduler Successfully assigned virtualmachines/samplepod to virt2
|
||||
Warning FailedCreatePodSandBox 3s kubelet Failed to create pod sandbox: rpc error: code = Unknown desc = failed to setup network for sandbox "3a6a58386dfbf2471a6f86bd41e4e9a32aac54ccccd1943742cb67d1e9c58b5b": plugin type="multus-shim" name="multus-cni-network" failed (add): CmdAdd (shim): CNI request failed with status 400: 'ContainerID:"3a6a58386dfbf2471a6f86bd41e4e9a32aac54ccccd1943742cb67d1e9c58b5b" Netns:"/var/run/netns/cni-1d80f6e3-fdab-4505-eb83-7deb17431293" IfName:"eth0" Args:"IgnoreUnknown=1;K8S_POD_NAMESPACE=virtualmachines;K8S_POD_NAME=samplepod;K8S_POD_INFRA_CONTAINER_ID=3a6a58386dfbf2471a6f86bd41e4e9a32aac54ccccd1943742cb67d1e9c58b5b;K8S_POD_UID=8304765e-fd7e-4968-9144-c42c53be04f4" Path:"" ERRORED: error configuring pod [virtualmachines/samplepod] networking: [virtualmachines/samplepod/8304765e-fd7e-4968-9144-c42c53be04f4:cbr0]: error adding container to network "cbr0": DelegateAdd: cannot set "" interface name to "eth0": validateIfName: no net namespace /var/run/netns/cni-1d80f6e3-fdab-4505-eb83-7deb17431293 found: failed to Statfs "/var/run/netns/cni-1d80f6e3-fdab-4505-eb83-7deb17431293": no such file or directory
|
||||
': StdinData: {"capabilities":{"portMappings":true},"clusterNetwork":"/host/etc/cni/net.d/10-flannel.conflist","cniVersion":"0.3.1","logLevel":"verbose","logToStderr":true,"name":"multus-cni-network","type":"multus-shim"}
|
||||
```
|
||||
|
||||
As of March 21, 2025, Multus has a [bug](https://github.com/k8snetworkplumbingwg/multus-cni/issues/1221) in the `install-multus-binary` container that can be lead to race problems after a node reboot.
|
||||
To prevent this issue, it is necessary to patch this container.
|
||||
Set the following command to the `install-multus-binary` container.
|
||||
|
||||
```yaml
|
||||
initContainers:
|
||||
- name: install-multus-binary
|
||||
command:
|
||||
- "/usr/src/multus-cni/bin/install_multus"
|
||||
- "-d"
|
||||
- "/host/opt/cni/bin"
|
||||
- "-t"
|
||||
- "thick"
|
||||
```
|
||||
|
||||
### Creating your `NetworkAttachmentDefinition`
|
||||
|
||||
The `NetworkAttachmentDefinition` configuration is used to define your bridge where your second pod interface needs to be attached to.
|
||||
|
||||
```yaml
|
||||
apiVersion: "k8s.cni.cncf.io/v1"
|
||||
kind: NetworkAttachmentDefinition
|
||||
metadata:
|
||||
name: macvlan-conf
|
||||
spec:
|
||||
config: '{
|
||||
"cniVersion": "0.3.0",
|
||||
"type": "macvlan",
|
||||
"master": "eth0",
|
||||
"mode": "bridge",
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "192.168.1.0/24",
|
||||
"rangeStart": "192.168.1.200",
|
||||
"rangeEnd": "192.168.1.216",
|
||||
"routes": [
|
||||
{ "dst": "0.0.0.0/0" }
|
||||
],
|
||||
"gateway": "192.168.1.1"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
In this example `macvlan` is used as a bridge type.
|
||||
There are 3 types of bridges: `bridge`, `macvlan` and `ipvlan`:
|
||||
|
||||
1. `bridge` is a way to connect two Ethernet segments together in a protocol-independent way.
|
||||
Packets are forwarded based on Ethernet address, rather than IP address (like a router).
|
||||
Since forwarding is done at Layer 2, all protocols can go transparently through a bridge.
|
||||
In terms of containers or virtual machines, a bridge can also be used to connect the virtual interfaces of each container/VM to the host network, allowing them to communicate.
|
||||
|
||||
2. `macvlan` is a driver that makes it possible to create virtual network interfaces that appear as distinct physical devices each with unique MAC addresses.
|
||||
The underlying interface can route traffic to each of these virtual interfaces separately, as if they were separate physical devices.
|
||||
This means that each macvlan interface can have its own IP subnet and routing.
|
||||
Macvlan interfaces are ideal for situations where containers or virtual machines require the same network access as the host system.
|
||||
|
||||
3. `ipvlan` is similar to `macvlan`, with the key difference being that ipvlan shares the parent's MAC address, which requires less configuration from the networking equipment.
|
||||
This makes deployments simpler in certain situations where MAC address control or limits are in place.
|
||||
It offers two operational modes: L2 mode (the default) where it behaves similarly to a MACVLAN, and L3 mode for routing based traffic isolation (rather than bridged).
|
||||
|
||||
When using the `bridge` interface you must also configure a bridge on your Talos nodes.
|
||||
That can be done by updating Talos Linux machine configuration:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: br0
|
||||
addresses:
|
||||
- 172.16.1.60/24
|
||||
bridge:
|
||||
stp:
|
||||
enabled: true
|
||||
interfaces:
|
||||
- eno1 # This must be changed to your matching interface name
|
||||
routes:
|
||||
- network: 0.0.0.0/0 # The route's network (destination).
|
||||
gateway: 172.16.1.254 # The route's gateway (if empty, creates link scope route).
|
||||
metric: 1024 # The optional metric for the route.
|
||||
```
|
||||
|
||||
More information about the configuration of bridges can be found [here](https://github.com/k8snetworkplumbingwg/multus-cni/tree/master/docs)
|
||||
|
||||
## Attaching the `NetworkAttachmentDefinition` to your `Pod` or `Deployment`
|
||||
|
||||
After the `NetworkAttachmentDefinition` is configured, you can attach that interface to your your `Deployment` or `Pod`.
|
||||
In this example we use a pod:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: samplepod
|
||||
annotations:
|
||||
k8s.v1.cni.cncf.io/networks: macvlan-conf
|
||||
spec:
|
||||
containers:
|
||||
- name: samplepod
|
||||
command: ["/bin/ash", "-c", "trap : TERM INT; sleep infinity & wait"]
|
||||
image: alpine
|
||||
```
|
||||
|
||||
## Notes on using KubeVirt in combination with Multus
|
||||
|
||||
If you would like to use KubeVirt and expose your virtual machine to the outside world with Multus, make sure to configure a `bridge` instead of `macvlan` or `ipvlan`, because that doesn't work, according to the KubeVirt [Documentation](https://kubevirt.io/user-guide/virtual_machines/interfaces_and_networks/#invalid-cnis-for-secondary-networks).
|
||||
|
||||
> Invalid CNIs for secondary networks
|
||||
> The following list of CNIs is known not to work for bridge interfaces - which are most common for secondary interfaces.
|
||||
>
|
||||
> * macvlan
|
||||
> * ipvlan
|
||||
>
|
||||
> The reason is similar: the bridge interface type moves the pod interface MAC address to the VM, leaving the pod interface with a different address.
|
||||
> The aforementioned CNIs require the pod interface to have the original MAC address.
|
||||
|
||||
## Notes on using Cilium in combination with Multus
|
||||
|
||||
### CNI reference plugins
|
||||
|
||||
Cilium does not ship the CNI reference plugins, which most multus setups are expecting (e.g. macvlan).
|
||||
This can be addressed by extending the daemonset with an additional init-container, setting them up, e.g. using the following kustomize strategic-merge patch:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: kube-multus-ds
|
||||
namespace: kube-system
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
initContainers:
|
||||
- command:
|
||||
- /install-cni.sh
|
||||
image: ghcr.io/siderolabs/install-cni:v1.7.0 # adapt to your talos version
|
||||
name: install-cni
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- mountPath: /host/opt/cni/bin
|
||||
mountPropagation: Bidirectional
|
||||
name: cnibin
|
||||
```
|
||||
|
||||
### Exclusive CNI
|
||||
|
||||
By default, Cilium is an exclusive CNI, meaning it removes other CNI configuration files.
|
||||
However, when using Multus, this behavior needs to be disabled.
|
||||
To do so, set the Helm variable `cni.exclusive=false`.
|
||||
For more information, refer to the [Cilium documentation](https://docs.cilium.io/en/stable/network/kubernetes/configuration/#adjusting-cni-configuration).
|
||||
|
||||
## Notes on ARM64 nodes
|
||||
|
||||
The official images (as of 29.07.24) are built incorrectly for ARM64 ([ref](https://github.com/k8snetworkplumbingwg/multus-cni/issues/1251)).
|
||||
Self-building them is an adequate workaround for now.
|
||||
387
website/content/v1.12/kubernetes-guides/upgrading-kubernetes.md
Normal file
387
website/content/v1.12/kubernetes-guides/upgrading-kubernetes.md
Normal file
@ -0,0 +1,387 @@
|
||||
---
|
||||
title: "Upgrading Kubernetes"
|
||||
description: "Guide on how to upgrade the Kubernetes cluster from Talos Linux."
|
||||
aliases:
|
||||
- guides/upgrading-kubernetes
|
||||
---
|
||||
|
||||
This guide covers upgrading Kubernetes on Talos Linux clusters.
|
||||
|
||||
For a list of Kubernetes versions compatible with each Talos release, see the [Support Matrix]({{< relref "../introduction/support-matrix" >}}).
|
||||
|
||||
For upgrading the Talos Linux operating system, see [Upgrading Talos]({{< relref "../talos-guides/upgrading-talos" >}})
|
||||
|
||||
## Video Walkthrough
|
||||
|
||||
To see a demo of this process, watch this video:
|
||||
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/uOKveKbD8MQ" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
## Automated Kubernetes Upgrade
|
||||
|
||||
The recommended method to upgrade Kubernetes is to use the `talosctl upgrade-k8s` command.
|
||||
This will automatically update the components needed to upgrade Kubernetes safely.
|
||||
Upgrading Kubernetes is non-disruptive to the cluster workloads.
|
||||
|
||||
To trigger a Kubernetes upgrade, issue a command specifying the version of Kubernetes to ugprade to, such as:
|
||||
|
||||
`talosctl --nodes <controlplane node> upgrade-k8s --to {{< k8s_release >}}`
|
||||
|
||||
Note that the `--nodes` parameter specifies the control plane node to send the API call to, but all members of the cluster will be upgraded.
|
||||
|
||||
To check what will be upgraded you can run `talosctl upgrade-k8s` with the `--dry-run` flag:
|
||||
|
||||
```bash
|
||||
$ talosctl --nodes <controlplane node> upgrade-k8s --to {{< k8s_release >}} --dry-run
|
||||
WARNING: found resources which are going to be deprecated/migrated in the version {{< k8s_release >}}
|
||||
RESOURCE COUNT
|
||||
validatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io 4
|
||||
mutatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io 3
|
||||
customresourcedefinitions.v1beta1.apiextensions.k8s.io 25
|
||||
apiservices.v1beta1.apiregistration.k8s.io 54
|
||||
leases.v1beta1.coordination.k8s.io 4
|
||||
automatically detected the lowest Kubernetes version {{< k8s_prev_release >}}
|
||||
checking for resource APIs to be deprecated in version {{< k8s_release >}}
|
||||
discovered controlplane nodes ["172.20.0.2" "172.20.0.3" "172.20.0.4"]
|
||||
discovered worker nodes ["172.20.0.5" "172.20.0.6"]
|
||||
updating "kube-apiserver" to version "{{< k8s_release >}}"
|
||||
> "172.20.0.2": starting update
|
||||
> update kube-apiserver: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
> skipped in dry-run
|
||||
> "172.20.0.3": starting update
|
||||
> update kube-apiserver: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
> skipped in dry-run
|
||||
> "172.20.0.4": starting update
|
||||
> update kube-apiserver: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
> skipped in dry-run
|
||||
updating "kube-controller-manager" to version "{{< k8s_release >}}"
|
||||
> "172.20.0.2": starting update
|
||||
> update kube-controller-manager: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
> skipped in dry-run
|
||||
> "172.20.0.3": starting update
|
||||
|
||||
<snip>
|
||||
|
||||
updating manifests
|
||||
> apply manifest Secret bootstrap-token-3lb63t
|
||||
> apply skipped in dry run
|
||||
> apply manifest ClusterRoleBinding system-bootstrap-approve-node-client-csr
|
||||
> apply skipped in dry run
|
||||
<snip>
|
||||
```
|
||||
|
||||
To upgrade Kubernetes from v{{< k8s_prev_release >}} to v{{< k8s_release >}} run:
|
||||
|
||||
```bash
|
||||
$ talosctl --nodes <controlplane node> upgrade-k8s --to {{< k8s_release >}}
|
||||
automatically detected the lowest Kubernetes version {{< k8s_prev_release >}}
|
||||
checking for resource APIs to be deprecated in version {{< k8s_release >}}
|
||||
discovered controlplane nodes ["172.20.0.2" "172.20.0.3" "172.20.0.4"]
|
||||
discovered worker nodes ["172.20.0.5" "172.20.0.6"]
|
||||
updating "kube-apiserver" to version "{{< k8s_release >}}"
|
||||
> "172.20.0.2": starting update
|
||||
> update kube-apiserver: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
> "172.20.0.2": machine configuration patched
|
||||
> "172.20.0.2": waiting for API server state pod update
|
||||
< "172.20.0.2": successfully updated
|
||||
> "172.20.0.3": starting update
|
||||
> update kube-apiserver: v{{< k8s_prev_release >}} -> {{< k8s_release >}}
|
||||
<snip>
|
||||
```
|
||||
|
||||
This command runs in several phases:
|
||||
|
||||
1. Images for new Kubernetes components are pre-pulled to the nodes to minimize downtime and test for image availability.
|
||||
2. Every control plane node machine configuration is patched with the new image version for each control plane component.
|
||||
Talos renders new static pod definitions on the configuration update which is picked up by the kubelet.
|
||||
The command waits for the change to propagate to the API server state.
|
||||
3. The command updates the `kube-proxy` daemonset with the new image version.
|
||||
4. On every node in the cluster, the `kubelet` version is updated.
|
||||
The command then waits for the `kubelet` service to be restarted and become healthy.
|
||||
The update is verified by checking the `Node` resource state.
|
||||
5. Kubernetes bootstrap manifests are re-applied to the cluster.
|
||||
Updated bootstrap manifests might come with a new Talos version (e.g. CoreDNS version update), or might be the result of machine configuration change.
|
||||
|
||||
> Note: The `upgrade-k8s` command never deletes any resources from the cluster: they should be deleted manually.
|
||||
|
||||
If the command fails for any reason, it can be safely restarted to continue the upgrade process from the moment of the failure.
|
||||
|
||||
> Note: When using custom/overridden Kubernetes component images, use flags `--*-image` to override the default image names.
|
||||
|
||||
## Manual Kubernetes Upgrade
|
||||
|
||||
Kubernetes can be upgraded manually by following the steps outlined below.
|
||||
They are equivalent to the steps performed by the `talosctl upgrade-k8s` command.
|
||||
|
||||
### Kubeconfig
|
||||
|
||||
In order to edit the control plane, you need a working `kubectl` config.
|
||||
If you don't already have one, you can get one by running:
|
||||
|
||||
```bash
|
||||
talosctl --nodes <controlplane node> kubeconfig
|
||||
```
|
||||
|
||||
### API Server
|
||||
|
||||
Patch machine configuration using `talosctl patch` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> patch mc --mode=no-reboot -p '[{"op": "replace", "path": "/cluster/apiServer/image", "value": "registry.k8s.io/kube-apiserver:v{{< k8s_release >}}"}]'
|
||||
patched mc at the node 172.20.0.2
|
||||
```
|
||||
|
||||
The JSON patch might need to be adjusted if current machine configuration is missing `.cluster.apiServer.image` key.
|
||||
|
||||
Also the machine configuration can be edited manually with `talosctl -n <IP> edit mc --mode=no-reboot`.
|
||||
|
||||
Capture the new version of `kube-apiserver` config with:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> get apiserverconfig -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: APIServerConfigs.kubernetes.talos.dev
|
||||
id: kube-apiserver
|
||||
version: 5
|
||||
owner: k8s.ControlPlaneAPIServerController
|
||||
phase: running
|
||||
spec:
|
||||
image: registry.k8s.io/kube-apiserver:v{{< k8s_release >}}
|
||||
cloudProvider: ""
|
||||
controlPlaneEndpoint: https://172.20.0.1:6443
|
||||
etcdServers:
|
||||
- https://localhost:2379
|
||||
localPort: 6443
|
||||
serviceCIDR:
|
||||
- 10.96.0.0/12
|
||||
extraArgs: {}
|
||||
extraVolumes: []
|
||||
environmentVariables: {}
|
||||
podSecurityPolicyEnabled: false
|
||||
advertisedAddress: $(POD_IP)
|
||||
resources:
|
||||
requests:
|
||||
cpu: ""
|
||||
memory: ""
|
||||
limits: {}
|
||||
```
|
||||
|
||||
In this example, the new version is `5`.
|
||||
Wait for the new pod definition to propagate to the API server state (replace `talos-default-controlplane-1` with the node name):
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-apiserver --field-selector spec.nodeName=talos-default-controlplane-1 -o jsonpath='{.items[0].metadata.annotations.talos\.dev/config\-version}'
|
||||
5
|
||||
```
|
||||
|
||||
Check that the pod is running:
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-apiserver --field-selector spec.nodeName=talos-default-controlplane-1
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
kube-apiserver-talos-default-controlplane-1 1/1 Running 0 16m
|
||||
```
|
||||
|
||||
Repeat this process for every control plane node, verifying that state got propagated successfully between each node update.
|
||||
|
||||
### Controller Manager
|
||||
|
||||
Patch machine configuration using `talosctl patch` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> patch mc --mode=no-reboot -p '[{"op": "replace", "path": "/cluster/controllerManager/image", "value": "registry.k8s.io/kube-controller-manager:v{{< k8s_release >}}"}]'
|
||||
patched mc at the node 172.20.0.2
|
||||
```
|
||||
|
||||
The JSON patch might need be adjusted if current machine configuration is missing `.cluster.controllerManager.image` key.
|
||||
|
||||
Capture new version of `kube-controller-manager` config with:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> get controllermanagerconfig -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: ControllerManagerConfigs.kubernetes.talos.dev
|
||||
id: kube-controller-manager
|
||||
version: 3
|
||||
owner: k8s.ControlPlaneControllerManagerController
|
||||
phase: running
|
||||
spec:
|
||||
enabled: true
|
||||
image: registry.k8s.io/kube-controller-manager:v{{< k8s_release >}}
|
||||
cloudProvider: ""
|
||||
podCIDRs:
|
||||
- 10.244.0.0/16
|
||||
serviceCIDRs:
|
||||
- 10.96.0.0/12
|
||||
extraArgs: {}
|
||||
extraVolumes: []
|
||||
environmentVariables: {}
|
||||
resources:
|
||||
requests:
|
||||
cpu: ""
|
||||
memory: ""
|
||||
limits: {}
|
||||
```
|
||||
|
||||
In this example, new version is `3`.
|
||||
Wait for the new pod definition to propagate to the API server state (replace `talos-default-controlplane-1` with the node name):
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-controller-manager --field-selector spec.nodeName=talos-default-controlplane-1 -o jsonpath='{.items[0].metadata.annotations.talos\.dev/config\-version}'
|
||||
3
|
||||
```
|
||||
|
||||
Check that the pod is running:
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-controller-manager --field-selector spec.nodeName=talos-default-controlplane-1
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
kube-controller-manager-talos-default-controlplane-1 1/1 Running 0 35m
|
||||
```
|
||||
|
||||
Repeat this process for every control plane node, verifying that state propagated successfully between each node update.
|
||||
|
||||
### Scheduler
|
||||
|
||||
Patch machine configuration using `talosctl patch` command:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> patch mc --mode=no-reboot -p '[{"op": "replace", "path": "/cluster/scheduler/image", "value": "registry.k8s.io/kube-scheduler:v{{< k8s_release >}}"}]'
|
||||
patched mc at the node 172.20.0.2
|
||||
```
|
||||
|
||||
JSON patch might need be adjusted if current machine configuration is missing `.cluster.scheduler.image` key.
|
||||
|
||||
Capture new version of `kube-scheduler` config with:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <CONTROL_PLANE_IP_1> get schedulerconfig -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: SchedulerConfigs.kubernetes.talos.dev
|
||||
id: kube-scheduler
|
||||
version: 3
|
||||
owner: k8s.ControlPlaneSchedulerController
|
||||
phase: running
|
||||
created: 2024-11-06T12:37:22Z
|
||||
updated: 2024-11-06T12:37:20Z
|
||||
spec:
|
||||
enabled: true
|
||||
image: registry.k8s.io/kube-scheduler:v{{< k8s_release >}}
|
||||
extraArgs: {}
|
||||
extraVolumes: []
|
||||
environmentVariables: {}
|
||||
resources:
|
||||
requests:
|
||||
cpu: ""
|
||||
memory: ""
|
||||
limits: {}
|
||||
config: {}
|
||||
```
|
||||
|
||||
In this example, new version is `3`.
|
||||
Wait for the new pod definition to propagate to the API server state (replace `talos-default-controlplane-1` with the node name):
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-scheduler --field-selector spec.nodeName=talos-default-controlplane-1 -o jsonpath='{.items[0].metadata.annotations.talos\.dev/config\-version}'
|
||||
3
|
||||
```
|
||||
|
||||
Check that the pod is running:
|
||||
|
||||
```bash
|
||||
$ kubectl get pod -n kube-system -l k8s-app=kube-scheduler --field-selector spec.nodeName=talos-default-controlplane-1
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
kube-scheduler-talos-default-controlplane-1 1/1 Running 0 39m
|
||||
```
|
||||
|
||||
Repeat this process for every control plane node, verifying that state got propagated successfully between each node update.
|
||||
|
||||
### Proxy
|
||||
|
||||
In the proxy's `DaemonSet`, change:
|
||||
|
||||
```yaml
|
||||
kind: DaemonSet
|
||||
...
|
||||
spec:
|
||||
...
|
||||
template:
|
||||
...
|
||||
spec:
|
||||
containers:
|
||||
- name: kube-proxy
|
||||
image: registry.k8s.io/kube-proxy:v{{< k8s_prev_release >}}
|
||||
tolerations:
|
||||
- ...
|
||||
```
|
||||
|
||||
to:
|
||||
|
||||
```yaml
|
||||
kind: DaemonSet
|
||||
...
|
||||
spec:
|
||||
...
|
||||
template:
|
||||
...
|
||||
spec:
|
||||
containers:
|
||||
- name: kube-proxy
|
||||
image: registry.k8s.io/kube-proxy:v{{< k8s_release >}}
|
||||
tolerations:
|
||||
- ...
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
```
|
||||
|
||||
To edit the `DaemonSet`, run:
|
||||
|
||||
```bash
|
||||
kubectl edit daemonsets -n kube-system kube-proxy
|
||||
```
|
||||
|
||||
### Bootstrap Manifests
|
||||
|
||||
Bootstrap manifests can be retrieved in a format which works for `kubectl` with the following command:
|
||||
|
||||
```bash
|
||||
talosctl -n <controlplane IP> get manifests -o yaml | yq eval-all '.spec | .[] | splitDoc' - > manifests.yaml
|
||||
```
|
||||
|
||||
Diff the manifests with the cluster:
|
||||
|
||||
```bash
|
||||
kubectl diff -f manifests.yaml
|
||||
```
|
||||
|
||||
Apply the manifests:
|
||||
|
||||
```bash
|
||||
kubectl apply -f manifests.yaml
|
||||
```
|
||||
|
||||
> Note: if some bootstrap resources were removed, they have to be removed from the cluster manually.
|
||||
|
||||
### kubelet
|
||||
|
||||
For every node, patch machine configuration with new kubelet version, wait for the kubelet to restart with new version:
|
||||
|
||||
```bash
|
||||
$ talosctl -n <IP> patch mc --mode=no-reboot -p '[{"op": "replace", "path": "/machine/kubelet/image", "value": "ghcr.io/siderolabs/kubelet:v{{< k8s_release >}}"}]'
|
||||
patched mc at the node 172.20.0.2
|
||||
```
|
||||
|
||||
Once `kubelet` restarts with the new configuration, confirm upgrade with `kubectl get nodes <name>`:
|
||||
|
||||
```bash
|
||||
$ kubectl get nodes talos-default-controlplane-1
|
||||
NAME STATUS ROLES AGE VERSION
|
||||
talos-default-controlplane-1 Ready control-plane 123m v{{< k8s_release >}}
|
||||
```
|
||||
4
website/content/v1.12/learn-more/_index.md
Normal file
4
website/content/v1.12/learn-more/_index.md
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
title: "Learn More"
|
||||
weight: 80
|
||||
---
|
||||
55
website/content/v1.12/learn-more/architecture.md
Normal file
55
website/content/v1.12/learn-more/architecture.md
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
title: "Architecture"
|
||||
weight: 20
|
||||
description: "Learn the system architecture of Talos Linux itself."
|
||||
---
|
||||
|
||||
Talos is designed to be **atomic** in _deployment_ and **modular** in _composition_.
|
||||
|
||||
It is atomic in that the entirety of Talos is distributed as a
|
||||
single, self-contained image, which is versioned, signed, and immutable.
|
||||
|
||||
It is modular in that it is composed of many separate components
|
||||
which have clearly defined gRPC interfaces which facilitate internal flexibility
|
||||
and external operational guarantees.
|
||||
|
||||
All of the main Talos components communicate with each other by gRPC, through a socket on the local machine.
|
||||
This imposes a clear separation of concerns and ensures that changes over time which affect the interoperation of components are a part of the public git record.
|
||||
The benefit is that each component may be iterated and changed as its needs dictate, so long as the external API is controlled.
|
||||
This is a key component in reducing coupling and maintaining modularity.
|
||||
|
||||
## File system partitions
|
||||
|
||||
Talos uses these partitions with the following labels:
|
||||
|
||||
1. **EFI** - stores EFI boot data.
|
||||
1. **BIOS** - used for GRUB's second stage boot.
|
||||
1. **BOOT** - used for the boot loader, stores initramfs and kernel data.
|
||||
1. **META** - stores metadata about the talos node, such as node id's.
|
||||
1. **STATE** - stores machine configuration, node identity data for cluster discovery and KubeSpan info
|
||||
1. **EPHEMERAL** - stores ephemeral state information, mounted at `/var`
|
||||
|
||||
## The File System
|
||||
|
||||
One of the unique design decisions in Talos is the layout of the root file system.
|
||||
There are three "layers" to the Talos root file system.
|
||||
At its core the rootfs is a read-only squashfs.
|
||||
The squashfs is then mounted as a loop device into memory.
|
||||
This provides Talos with an immutable base.
|
||||
|
||||
The next layer is a set of `tmpfs` file systems for runtime specific needs.
|
||||
Aside from the standard pseudo file systems such as `/dev`, `/proc`, `/run`, `/sys` and `/tmp`, a special `/system` is created for internal needs.
|
||||
One reason for this is that we need special files such as `/etc/hosts`, and `/etc/resolv.conf` to be writable (remember that the rootfs is read-only).
|
||||
For example, at boot Talos will write `/system/etc/hosts` and then bind mount it over `/etc/hosts`.
|
||||
This means that instead of making all of `/etc` writable, Talos only makes very specific files writable under `/etc`.
|
||||
|
||||
All files under `/system` are completely recreated on each boot.
|
||||
For files and directories that need to persist across boots, Talos creates `overlayfs` file systems.
|
||||
The `/etc/kubernetes` is a good example of this.
|
||||
Directories like this are `overlayfs` backed by an XFS file system mounted at `/var`.
|
||||
|
||||
The `/var` directory is owned by Kubernetes with the exception of the above `overlayfs` file systems.
|
||||
This directory is writable and used by `etcd` (in the case of control plane nodes), the kubelet, and the CRI (containerd).
|
||||
Its content survives machine reboots and on machine upgrades, but it is wiped and lost on resets, unless the
|
||||
`--system-labels-to-wipe` option of [`talosctl reset`]({{< relref "../reference/cli#talosctl-reset" >}})
|
||||
is used.
|
||||
120
website/content/v1.12/learn-more/components.md
Normal file
120
website/content/v1.12/learn-more/components.md
Normal file
@ -0,0 +1,120 @@
|
||||
---
|
||||
title: "Components"
|
||||
weight: 40
|
||||
description: "Understand the system components that make up Talos Linux."
|
||||
---
|
||||
|
||||
In this section, we discuss the various components that underpin Talos.
|
||||
|
||||
## Components
|
||||
|
||||
Talos Linux and Kubernetes are tightly integrated.
|
||||
|
||||

|
||||
|
||||
In the following, the focus is on the Talos Linux specific components.
|
||||
|
||||
| Component | Description |
|
||||
| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| apid | When interacting with Talos, the gRPC API endpoint you interact with directly is provided by `apid`. `apid` acts as the gateway for all component interactions and forwards the requests to `machined`. |
|
||||
| containerd | An industry-standard container runtime with an emphasis on simplicity, robustness, and portability. To learn more, see the [containerd website](https://containerd.io). |
|
||||
| machined | Talos replacement for the traditional Linux init-process. Specially designed to run Kubernetes and does not allow starting arbitrary user services. |
|
||||
| kernel | The Linux kernel included with Talos is configured according to the recommendations outlined in the [Kernel Self Protection Project](https://kspp.github.io/). |
|
||||
| trustd | To run and operate a Kubernetes cluster, a certain level of trust is required. Based on the concept of a 'Root of Trust', `trustd` is a simple daemon responsible for establishing trust within the system. |
|
||||
| udevd | Implementation of `eudev` into `machined`. `eudev` is Gentoo's fork of udev, systemd's device file manager for the Linux kernel. It manages device nodes in /dev and handles all user space actions when adding or removing devices. To learn more, see the [Gentoo Wiki](https://wiki.gentoo.org/wiki/Eudev). |
|
||||
|
||||
### apid
|
||||
|
||||
When interacting with Talos, the gRPC api endpoint you will interact with directly is `apid`.
|
||||
Apid acts as the gateway for all component interactions.
|
||||
Apid provides a mechanism to route requests to the appropriate destination when running on a control plane node.
|
||||
|
||||
We'll use some examples below to illustrate what `apid` is doing.
|
||||
|
||||
When a user wants to interact with a Talos component via `talosctl`, there are two flags that control the interaction with `apid`.
|
||||
The `-e | --endpoints` flag specifies which Talos node ( via `apid` ) should handle the connection.
|
||||
Typically this is a public-facing server.
|
||||
The `-n | --nodes` flag specifies which Talos node(s) should respond to the request.
|
||||
If `--nodes` is omitted, the first endpoint will be used.
|
||||
|
||||
> Note: Typically, there will be an `endpoint` already defined in the Talos config file.
|
||||
> Optionally, `nodes` can be included here as well.
|
||||
|
||||
For example, if a user wants to interact with `machined`, a command like `talosctl -e cluster.talos.dev memory` may be used.
|
||||
|
||||
```bash
|
||||
$ talosctl -e cluster.talos.dev memory
|
||||
NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE
|
||||
cluster.talos.dev 7938 1768 2390 145 53 3724 6571
|
||||
```
|
||||
|
||||
In this case, `talosctl` is interacting with `apid` running on `cluster.talos.dev` and forwarding the request to the `machined` api.
|
||||
|
||||
If we wanted to extend our example to retrieve `memory` from another node in our cluster, we could use the command `talosctl -e cluster.talos.dev -n node02 memory`.
|
||||
|
||||
```bash
|
||||
$ talosctl -e cluster.talos.dev -n node02 memory
|
||||
NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE
|
||||
node02 7938 1768 2390 145 53 3724 6571
|
||||
```
|
||||
|
||||
The `apid` instance on `cluster.talos.dev` receives the request and forwards it to `apid` running on `node02`, which forwards the request to the `machined` api.
|
||||
|
||||
We can further extend our example to retrieve `memory` for all nodes in our cluster by appending additional `-n node` flags or using a comma separated list of nodes ( `-n node01,node02,node03` ):
|
||||
|
||||
```bash
|
||||
$ talosctl -e cluster.talos.dev -n node01 -n node02 -n node03 memory
|
||||
NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE
|
||||
node01 7938 871 4071 137 49 2945 7042
|
||||
node02 257844 14408 190796 18138 49 52589 227492
|
||||
node03 257844 1830 255186 125 49 777 254556
|
||||
```
|
||||
|
||||
The `apid` instance on `cluster.talos.dev` receives the request and forwards it to `node01`, `node02`, and `node03`, which then forwards the request to their local `machined` api.
|
||||
|
||||
### containerd
|
||||
|
||||
[Containerd](https://github.com/containerd/containerd) provides the container runtime to launch workloads on Talos and Kubernetes.
|
||||
|
||||
Talos services are namespaced under the `system` namespace in containerd, whereas the Kubernetes services are namespaced under the `k8s.io` namespace.
|
||||
|
||||
### machined
|
||||
|
||||
A common theme throughout the design of Talos is minimalism.
|
||||
We believe strongly in the UNIX philosophy that each program should do one job well.
|
||||
The `init` included in Talos is one example of this, and we are calling it "`machined`".
|
||||
|
||||
We wanted to create a focused `init` that had one job - run Kubernetes.
|
||||
To that extent, `machined` is relatively static in that it does not allow for arbitrary user-defined services.
|
||||
Only the services necessary to run Kubernetes and manage the node are available.
|
||||
This includes:
|
||||
|
||||
- containerd
|
||||
- etcd
|
||||
- [kubelet](https://kubernetes.io/docs/concepts/overview/components/)
|
||||
- networkd
|
||||
- trustd
|
||||
- udevd
|
||||
|
||||
The `machined` process handles all machine configuration, API handling, resource and controller management.
|
||||
|
||||
### kernel
|
||||
|
||||
The Linux kernel included with Talos is configured according to the recommendations outlined in the Kernel Self Protection Project ([KSPP](https://kspp.github.io/)).
|
||||
|
||||
### trustd
|
||||
|
||||
Security is one of the highest priorities within Talos.
|
||||
To run a Kubernetes cluster, a certain level of trust is required to operate a cluster.
|
||||
For example, orchestrating the bootstrap of a highly available control plane requires sensitive PKI data distribution.
|
||||
|
||||
To that end, we created `trustd`.
|
||||
Based on a Root of Trust concept, `trustd` is a simple daemon responsible for establishing trust within the system.
|
||||
Once trust is established, various methods become available to the trustee.
|
||||
For example, it can accept a write request from another node to place a file on disk.
|
||||
|
||||
Additional methods and capabilities will be added to the `trustd` component to support new functionality in the rest of the Talos environment.
|
||||
|
||||
### udevd
|
||||
|
||||
Udevd handles the kernel device notifications and sets up the necessary links in `/dev`.
|
||||
145
website/content/v1.12/learn-more/control-plane.md
Normal file
145
website/content/v1.12/learn-more/control-plane.md
Normal file
@ -0,0 +1,145 @@
|
||||
---
|
||||
title: "Control Plane"
|
||||
weight: 50
|
||||
description: "Understand the Kubernetes Control Plane."
|
||||
---
|
||||
|
||||
This guide provides information about the Kubernetes control plane, and details on how Talos runs and bootstraps the Kubernetes control plane.
|
||||
|
||||
<!-- markdownlint-disable MD026 -->
|
||||
|
||||
## What is a control plane node?
|
||||
|
||||
A control plane node is a node which:
|
||||
|
||||
- runs etcd, the Kubernetes database
|
||||
- runs the Kubernetes control plane
|
||||
- kube-apiserver
|
||||
- kube-controller-manager
|
||||
- kube-scheduler
|
||||
- serves as an administrative proxy to the worker nodes
|
||||
|
||||
These nodes are critical to the operation of your cluster.
|
||||
Without control plane nodes, Kubernetes will not respond to changes in the
|
||||
system, and certain central services may not be available.
|
||||
|
||||
Talos nodes which have `.machine.type` of `controlplane` are control plane nodes.
|
||||
(check via `talosctl get member`)
|
||||
|
||||
Control plane nodes are tainted by default to prevent workloads from being scheduled onto them.
|
||||
This is both to protect the control plane from workloads consuming resources and starving the control plane processes, and also to reduce the risk of a vulnerability exposes the control plane's credentials to a workload.
|
||||
|
||||
## The Control Plane and Etcd
|
||||
|
||||
A critical design concept of Kubernetes (and Talos) is the `etcd` database.
|
||||
|
||||
Properly managed (which Talos Linux does), `etcd` should never have split brain or noticeable down time.
|
||||
In order to do this, `etcd` maintains the concept of "membership" and of
|
||||
"quorum".
|
||||
To perform any operation, read or write, the database requires
|
||||
quorum.
|
||||
That is, a majority of members must agree on the current leader, and absenteeism (members that are down, or not reachable)
|
||||
counts as a negative.
|
||||
For example, if there are three members, at least two out
|
||||
of the three must agree on the current leader.
|
||||
If two disagree or fail to answer, the `etcd` database will lock itself
|
||||
until quorum is achieved in order to protect the integrity of
|
||||
the data.
|
||||
|
||||
This design means that having two controlplane nodes is _worse_ than having only one, because if _either_ goes down, your database will lock (and the chance of one of two nodes going down is greater than the chance of just a single node going down).
|
||||
Similarly, a 4 node etcd cluster is worse than a 3 node etcd cluster - a 4 node cluster requires 3 nodes to be up to achieve quorum (in order to have a majority), while the 3 node cluster requires 2 nodes:
|
||||
i.e. both can support a single node failure and keep running - but the chance of a node failing in a 4 node cluster is higher than that in a 3 node cluster.
|
||||
|
||||
Another note about etcd: due to the need to replicate data amongst members, performance of etcd _decreases_ as the cluster scales.
|
||||
A 5 node cluster can commit about 5% less writes per second than a 3 node cluster running on the same hardware.
|
||||
|
||||
## Recommendations for your control plane
|
||||
|
||||
- Run your clusters with three or five control plane nodes.
|
||||
Three is enough for most use cases.
|
||||
Five will give you better availability (in that it can tolerate two node failures simultaneously), but cost you more both in the number of nodes required, and also as each node may require more hardware resources to offset the performance degradation seen in larger clusters.
|
||||
- Implement good monitoring and put processes in place to deal with a failed node in a timely manner (and test them!)
|
||||
- Even with robust monitoring and procedures for replacing failed nodes in place, backup etcd and your control plane node configuration to guard against unforeseen disasters.
|
||||
- Monitor the performance of your etcd clusters.
|
||||
If etcd performance is slow, vertically scale the nodes, not the number of nodes.
|
||||
- If a control plane node fails, remove it first, then add the replacement node.
|
||||
(This ensures that the failed node does not "vote" when adding in the new node, minimizing the chances of a quorum violation.)
|
||||
- If replacing a node that has not failed, add the new one, then remove the old.
|
||||
|
||||
## Bootstrapping the Control Plane
|
||||
|
||||
Every new cluster must be bootstrapped only once, which is achieved by telling a single control plane node to initiate the bootstrap.
|
||||
|
||||
Bootstrapping itself does not do anything with Kubernetes.
|
||||
Bootstrapping only tells `etcd` to form a cluster, so don't judge the success of
|
||||
a bootstrap by the failure of Kubernetes to start.
|
||||
Kubernetes relies on `etcd`, so bootstrapping is _required_, but it is not
|
||||
_sufficient_ for Kubernetes to start.
|
||||
If your Kubernetes cluster fails to form for other reasons (say, a bad
|
||||
configuration option or unavailable container repository), if the bootstrap API
|
||||
call returns successfully, you do NOT need to bootstrap again:
|
||||
just fix the config or let Kubernetes retry.
|
||||
|
||||
### High-level Overview
|
||||
|
||||
Talos cluster bootstrap flow:
|
||||
|
||||
1. The `etcd` service is started on control plane nodes.
|
||||
Instances of `etcd` on control plane nodes build the `etcd` cluster.
|
||||
2. The `kubelet` service is started.
|
||||
3. Control plane components are started as static pods via the `kubelet`, and the `kube-apiserver` component connects to the local (running on the same node) `etcd` instance.
|
||||
4. The `kubelet` issues client certificate using the bootstrap token using the control plane endpoint (via `kube-apiserver` and `kube-controller-manager`).
|
||||
5. The `kubelet` registers the node in the API server.
|
||||
6. Kubernetes control plane schedules pods on the nodes.
|
||||
|
||||
### Cluster Bootstrapping
|
||||
|
||||
All nodes start the `kubelet` service.
|
||||
The `kubelet` tries to contact the control plane endpoint, but as it is not up yet, it keeps retrying.
|
||||
|
||||
One of the control plane nodes is chosen as the bootstrap node, and promoted using the bootstrap API (`talosctl bootstrap`).
|
||||
The bootstrap node initiates the `etcd` bootstrap process by initializing `etcd` as the first member of the cluster.
|
||||
|
||||
> Once `etcd` is bootstrapped, the bootstrap node has no special role and acts the same way as other control plane nodes.
|
||||
|
||||
Services `etcd` on non-bootstrap nodes try to get `Endpoints` resource via control plane endpoint, but that request fails as control plane endpoint is not up yet.
|
||||
|
||||
As soon as `etcd` is up on the bootstrap node, static pod definitions for the Kubernetes control plane components (`kube-apiserver`, `kube-controller-manager`, `kube-scheduler`) are rendered to disk.
|
||||
The `kubelet` service on the bootstrap node picks up the static pod definitions and starts the Kubernetes control plane components.
|
||||
As soon as `kube-apiserver` is launched, the control plane endpoint comes up.
|
||||
|
||||
The bootstrap node acquires an `etcd` mutex and injects the bootstrap manifests into the API server.
|
||||
The set of the bootstrap manifests specify the Kubernetes join token and kubelet CSR auto-approval.
|
||||
The `kubelet` service on all the nodes is now able to issue client certificates for themselves and register nodes in the API server.
|
||||
|
||||
Other bootstrap manifests specify additional resources critical for Kubernetes operations (i.e. CNI, PSP, etc.)
|
||||
|
||||
The `etcd` service on non-bootstrap nodes is now able to discover other members of the `etcd` cluster via the Kubernetes `Endpoints` resource.
|
||||
The `etcd` cluster is now formed and consists of all control plane nodes.
|
||||
|
||||
All control plane nodes render static pod manifests for the control plane components.
|
||||
Each node now runs a full set of components to make the control plane HA.
|
||||
|
||||
The `kubelet` service on worker nodes is now able to issue the client certificate and register itself with the API server.
|
||||
|
||||
### Scaling Up the Control Plane
|
||||
|
||||
When new nodes are added to the control plane, the process is the same as the bootstrap process above: the `etcd` service discovers existing members of the control plane via the
|
||||
control plane endpoint, joins the `etcd` cluster, and the control plane components are scheduled on the node.
|
||||
|
||||
### Scaling Down the Control Plane
|
||||
|
||||
Scaling down the control plane involves removing a node from the cluster.
|
||||
The most critical part is making sure that the node which is being removed leaves the etcd cluster.
|
||||
The recommended way to do this is to use:
|
||||
|
||||
- `talosctl -n IP.of.node.to.remove reset`
|
||||
- `kubectl delete node`
|
||||
|
||||
When using `talosctl reset` command, the targeted control plane node leaves the `etcd` cluster as part of the reset sequence, and its disks are erased.
|
||||
|
||||
### Upgrading Talos on Control Plane Nodes
|
||||
|
||||
When a control plane node is upgraded, Talos leaves `etcd`, wipes the system disk, installs a new version of itself, and reboots.
|
||||
The upgraded node then joins the `etcd` cluster on reboot.
|
||||
So upgrading a control plane node is equivalent to scaling down the control plane node followed by scaling up with a new version of Talos.
|
||||
230
website/content/v1.12/learn-more/controllers-resources.md
Normal file
230
website/content/v1.12/learn-more/controllers-resources.md
Normal file
@ -0,0 +1,230 @@
|
||||
---
|
||||
title: "Controllers and Resources"
|
||||
weight: 60
|
||||
description: "Discover how Talos Linux uses the concepts on Controllers and Resources."
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable MD038 -->
|
||||
|
||||
Talos implements concepts of *resources* and *controllers* to facilitate internal operations of the operating system.
|
||||
Talos resources and controllers are very similar to Kubernetes resources and controllers, but there are some differences.
|
||||
The content of this document is not required to operate Talos, but it is useful for troubleshooting.
|
||||
|
||||
Starting with Talos 0.9, most of the Kubernetes control plane bootstrapping and operations is implemented via controllers and resources which allows Talos to be reactive to configuration changes, environment changes (e.g. time sync).
|
||||
|
||||
## Resources
|
||||
|
||||
A resource captures a piece of system state.
|
||||
Each resource belongs to a "Type" which defines resource contents.
|
||||
Resource state can be split in two parts:
|
||||
|
||||
* metadata: fixed set of fields describing resource - namespace, type, ID, etc.
|
||||
* spec: contents of the resource (depends on resource type).
|
||||
|
||||
Resource is uniquely identified by (`namespace`, `type`, `id`).
|
||||
Namespaces provide a way to avoid conflicts on duplicate resource IDs.
|
||||
|
||||
At the moment of this writing, all resources are local to the node and stored in memory.
|
||||
So on every reboot resource state is rebuilt from scratch (the only exception is `MachineConfig` resource which reflects current machine config).
|
||||
|
||||
## Controllers
|
||||
|
||||
Controllers run as independent lightweight threads in Talos.
|
||||
The goal of the controller is to reconcile the state based on inputs and eventually update outputs.
|
||||
|
||||
A controller can have any number of resource types (and namespaces) as inputs.
|
||||
In other words, it watches specified resources for changes and reconciles when these changes occur.
|
||||
A controller might also have additional inputs: running reconcile on schedule, watching `etcd` keys, etc.
|
||||
|
||||
A controller has a single output: a set of resources of fixed type in a fixed namespace.
|
||||
Only one controller can manage resource type in the namespace, so conflicts are avoided.
|
||||
|
||||
## Querying Resources
|
||||
|
||||
Talos CLI tool `talosctl` provides read-only access to the resource API which includes getting specific resource,
|
||||
listing resources and watching for changes.
|
||||
|
||||
Talos stores resources describing resource types and namespaces in `meta` namespace:
|
||||
|
||||
```bash
|
||||
$ talosctl get resourcedefinitions
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 meta ResourceDefinition bootstrapstatuses.v1alpha1.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition etcdsecrets.secrets.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition kubernetescontrolplaneconfigs.config.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition kubernetessecrets.secrets.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition machineconfigs.config.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition machinetypes.config.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition manifests.kubernetes.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition manifeststatuses.kubernetes.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition namespaces.meta.cosi.dev 1
|
||||
172.20.0.2 meta ResourceDefinition resourcedefinitions.meta.cosi.dev 1
|
||||
172.20.0.2 meta ResourceDefinition rootsecrets.secrets.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition secretstatuses.kubernetes.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition services.v1alpha1.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition staticpods.kubernetes.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition staticpodstatuses.kubernetes.talos.dev 1
|
||||
172.20.0.2 meta ResourceDefinition timestatuses.v1alpha1.talos.dev 1
|
||||
```
|
||||
|
||||
```bash
|
||||
$ talosctl get namespaces
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 meta Namespace config 1
|
||||
172.20.0.2 meta Namespace controlplane 1
|
||||
172.20.0.2 meta Namespace meta 1
|
||||
172.20.0.2 meta Namespace runtime 1
|
||||
172.20.0.2 meta Namespace secrets 1
|
||||
```
|
||||
|
||||
Most of the time namespace flag (`--namespace`) can be omitted, as `ResourceDefinition` contains default
|
||||
namespace which is used if no namespace is given:
|
||||
|
||||
```bash
|
||||
$ talosctl get resourcedefinitions resourcedefinitions.meta.cosi.dev -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: meta
|
||||
type: ResourceDefinitions.meta.cosi.dev
|
||||
id: resourcedefinitions.meta.cosi.dev
|
||||
version: 1
|
||||
phase: running
|
||||
spec:
|
||||
type: ResourceDefinitions.meta.cosi.dev
|
||||
displayType: ResourceDefinition
|
||||
aliases:
|
||||
- resourcedefinitions
|
||||
- resourcedefinition
|
||||
- resourcedefinitions.meta
|
||||
- resourcedefinitions.meta.cosi
|
||||
- rd
|
||||
- rds
|
||||
printColumns: []
|
||||
defaultNamespace: meta
|
||||
```
|
||||
|
||||
Resource definition also contains type aliases which can be used interchangeably with canonical resource name:
|
||||
|
||||
```bash
|
||||
$ talosctl get ns config
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 meta Namespace config 1
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
Command `talosctl get` supports following output modes:
|
||||
|
||||
* `table` (default) prints resource list as a table
|
||||
* `yaml` prints pretty formatted resources with details, including full metadata spec.
|
||||
This format carries most details from the backend resource (e.g. comments in `MachineConfig` resource)
|
||||
* `json` prints same information as `yaml`, some additional details (e.g. comments) might be lost.
|
||||
This format is useful for automated processing with tools like `jq`.
|
||||
|
||||
### Watching Changes
|
||||
|
||||
If flag `--watch` is appended to the `talosctl get` command, the command switches to watch mode.
|
||||
If list of resources was requested, `talosctl` prints initial contents of the list and then appends resource information for every change:
|
||||
|
||||
```bash
|
||||
$ talosctl get svc -w
|
||||
NODE * NAMESPACE TYPE ID VERSION RUNNING HEALTHY
|
||||
172.20.0.2 + runtime Service timed 2 true true
|
||||
172.20.0.2 + runtime Service trustd 2 true true
|
||||
172.20.0.2 + runtime Service udevd 2 true true
|
||||
172.20.0.2 - runtime Service timed 2 true true
|
||||
172.20.0.2 + runtime Service timed 1 true false
|
||||
172.20.0.2 runtime Service timed 2 true true
|
||||
```
|
||||
|
||||
Column `*` specifies event type:
|
||||
|
||||
* `+` is created
|
||||
* `-` is deleted
|
||||
* ` ` is updated
|
||||
|
||||
In YAML/JSON output, field `event` is added to the resource representation to describe the event type.
|
||||
|
||||
### Examples
|
||||
|
||||
Getting machine config:
|
||||
|
||||
```bash
|
||||
$ talosctl get machineconfig v1alpha1 -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: config
|
||||
type: MachineConfigs.config.talos.dev
|
||||
id: v1alpha1
|
||||
version: 2
|
||||
phase: running
|
||||
spec:
|
||||
version: v1alpha1 # Indicates the schema used to decode the contents.
|
||||
debug: false # Enable verbose logging to the console.
|
||||
persist: true # Indicates whether to pull the machine config upon every boot.
|
||||
# Provides machine specific configuration options.
|
||||
...
|
||||
```
|
||||
|
||||
Getting control plane static pod statuses:
|
||||
|
||||
```bash
|
||||
$ talosctl get staticpodstatus
|
||||
NODE NAMESPACE TYPE ID VERSION READY
|
||||
172.20.0.2 controlplane StaticPodStatus kube-system/kube-apiserver-talos-default-controlplane-1 3 True
|
||||
172.20.0.2 controlplane StaticPodStatus kube-system/kube-controller-manager-talos-default-controlplane-1 3 True
|
||||
172.20.0.2 controlplane StaticPodStatus kube-system/kube-scheduler-talos-default-controlplane-1 4 True
|
||||
```
|
||||
|
||||
Getting static pod definition for `kube-apiserver`:
|
||||
|
||||
```bash
|
||||
$ talosctl get sp kube-apiserver -n 172.20.0.2 -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: controlplane
|
||||
type: StaticPods.kubernetes.talos.dev
|
||||
id: kube-apiserver
|
||||
version: 3
|
||||
phase: running
|
||||
finalizers:
|
||||
- k8s.StaticPodStatus("kube-apiserver")
|
||||
spec:
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
annotations:
|
||||
talos.dev/config-version: "1"
|
||||
talos.dev/secrets-version: "2"
|
||||
...
|
||||
```
|
||||
|
||||
## Inspecting Controller Dependencies
|
||||
|
||||
Talos can report current dependencies between controllers and resources for debugging purposes:
|
||||
|
||||
```bash
|
||||
$ talosctl inspect dependencies
|
||||
digraph {
|
||||
|
||||
n1[label="config.K8sControlPlaneController",shape="box"];
|
||||
n3[label="config.MachineTypeController",shape="box"];
|
||||
n2[fillcolor="azure2",label="config:KubernetesControlPlaneConfigs.config.talos.dev",shape="note",style="filled"];
|
||||
...
|
||||
```
|
||||
|
||||
This outputs graph in `graphviz` format which can be rendered to PNG with command:
|
||||
|
||||
```bash
|
||||
talosctl inspect dependencies | dot -T png > deps.png
|
||||
```
|
||||
|
||||

|
||||
|
||||
Graph can be enhanced by replacing resource types with actual resource instances:
|
||||
|
||||
```bash
|
||||
talosctl inspect dependencies --with-resources | dot -T png > deps.png
|
||||
```
|
||||
|
||||

|
||||
72
website/content/v1.12/learn-more/faqs.md
Normal file
72
website/content/v1.12/learn-more/faqs.md
Normal file
@ -0,0 +1,72 @@
|
||||
---
|
||||
title: "FAQs"
|
||||
weight: 999
|
||||
description: "Frequently Asked Questions about Talos Linux."
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable MD026 -->
|
||||
|
||||
## How is Talos different from other container optimized Linux distros?
|
||||
|
||||
Talos integrates tightly with Kubernetes, and is not meant to be a general-purpose operating system.
|
||||
The most important difference is that Talos is fully controlled by an API via a gRPC interface, instead of an ordinary shell.
|
||||
We don't ship SSH, and there is no console access.
|
||||
Removing components such as these has allowed us to dramatically reduce the footprint of Talos, and in turn, improve a number of other areas like security, predictability, reliability, and consistency across platforms.
|
||||
It's a big change from how operating systems have been managed in the past, but we believe that API-driven OSes are the future.
|
||||
|
||||
## Why no shell or SSH?
|
||||
|
||||
Since Talos is fully API-driven, all maintenance and debugging operations are possible via the OS API.
|
||||
We would like for Talos users to start thinking about what a "machine" is in the context of a Kubernetes cluster.
|
||||
That is, that a Kubernetes _cluster_ can be thought of as one massive machine, and the _nodes_ are merely additional, undifferentiated resources.
|
||||
We don't want humans to focus on the _nodes_, but rather on the _machine_ that is the Kubernetes cluster.
|
||||
Should an issue arise at the node level, `talosctl` should provide the necessary tooling to assist in the identification, debugging, and remediation of the issue.
|
||||
However, the API is based on the Principle of Least Privilege, and exposes only a limited set of methods.
|
||||
We envision Talos being a great place for the application of [control theory](https://en.wikipedia.org/wiki/Control_theory) in order to provide a self-healing platform.
|
||||
|
||||
## Why the name "Talos"?
|
||||
|
||||
Talos was an automaton created by the Greek God of the forge to protect the island of Crete.
|
||||
He would patrol the coast and enforce laws throughout the land.
|
||||
We felt it was a fitting name for a security focused operating system designed to run Kubernetes.
|
||||
|
||||
## Why does Talos rely on a separate configuration from Kubernetes?
|
||||
|
||||
The `talosconfig` file contains client credentials to access the Talos Linux API.
|
||||
Sometimes Kubernetes might be down for a number of reasons (etcd issues, misconfiguration, etc.), while Talos API access will always be available.
|
||||
The Talos API is a way to access the operating system and fix issues, e.g. fixing access to Kubernetes.
|
||||
When Talos Linux is running fine, using the Kubernetes APIs (via `kubeconfig`) is all you should need to deploy and manage Kubernetes workloads.
|
||||
|
||||
## How does Talos handle certificates?
|
||||
|
||||
During the machine config generation process, Talos generates a set of certificate authorities (CAs) that remains valid for 10 years.
|
||||
Talos is responsible for managing certificates for `etcd`, Talos API (`apid`), node certificates (`kubelet`), and other components.
|
||||
It also handles the automatic rotation of server-side certificates.
|
||||
|
||||
However, client certificates such as `talosconfig` and `kubeconfig` are the user's responsibility, and by default, they have a validity period of 1 year.
|
||||
|
||||
To renew the `talosconfig` certificate, the follow [this process]({{< relref "../talos-guides/howto/cert-management" >}}).
|
||||
To renew `kubeconfig`, use `talosctl kubeconfig` command, and the time-to-live (TTL) is defined in the [configuration]({{< relref "../reference/configuration/#adminkubeconfigconfig" >}}).
|
||||
|
||||
## How can I set the timezone of my Talos Linux clusters?
|
||||
|
||||
Talos doesn't support timezones, and will always run in UTC.
|
||||
This ensures consistency of log timestamps for all Talos Linux clusters, simplifying debugging.
|
||||
Your containers can run with any timezone configuration you desire, but the timezone of Talos Linux is not configurable.
|
||||
|
||||
## How do I see Talos kernel configuration?
|
||||
|
||||
### Using Talos API
|
||||
|
||||
Current kernel config can be read with `talosctl -n <NODE> read /proc/config.gz`.
|
||||
|
||||
For example:
|
||||
|
||||
```shell
|
||||
talosctl -n NODE read /proc/config.gz | zgrep E1000
|
||||
```
|
||||
|
||||
### Using GitHub
|
||||
|
||||
For `amd64`, see https://github.com/siderolabs/pkgs/blob/main/kernel/build/config-amd64.
|
||||
Use appropriate branch to see the kernel config matching your Talos release.
|
||||
182
website/content/v1.12/learn-more/image-factory.md
Normal file
182
website/content/v1.12/learn-more/image-factory.md
Normal file
@ -0,0 +1,182 @@
|
||||
---
|
||||
title: "Image Factory"
|
||||
weight: 55
|
||||
description: "Image Factory generates customized Talos Linux images based on configured schematics."
|
||||
---
|
||||
|
||||
The Image Factory provides a way to download Talos Linux artifacts.
|
||||
Artifacts can be generated with customizations defined by a "schematic".
|
||||
A schematic can be applied to any of the versions of Talos Linux offered by the Image Factory to produce a "model".
|
||||
|
||||
The following assets are provided:
|
||||
|
||||
* ISO
|
||||
* `kernel`, `initramfs`, and kernel command line
|
||||
* UKI
|
||||
* disk images in various formats (e.g. AWS, GCP, VMware, etc.)
|
||||
* installer container images
|
||||
|
||||
The supported frontends are:
|
||||
|
||||
* HTTP
|
||||
* PXE
|
||||
* Container Registry
|
||||
|
||||
The official instance of Image Factory is available at https://factory.talos.dev.
|
||||
|
||||
See [Boot Assets]({{< relref "../talos-guides/install/boot-assets#image-factory" >}}) for an example of how to use the Image Factory to boot and upgrade Talos on different platforms.
|
||||
Full API documentation for the Image Factory is available at [GitHub](https://github.com/siderolabs/image-factory#readme).
|
||||
|
||||
## Schematics
|
||||
|
||||
Schematics are YAML files that define customizations to be applied to a Talos Linux image.
|
||||
Schematics can be applied to any of the versions of Talos Linux offered by the Image Factory to produce a "model", which is a Talos Linux image with the customizations applied.
|
||||
|
||||
Schematics are content-addressable, that is, the content of the schematic is used to generate a unique ID.
|
||||
The schematic should be uploaded to the Image Factory first, and then the ID can be used to reference the schematic in a model.
|
||||
|
||||
Schematics can be generated using the [Image Factory UI](#ui), or using the Image Factory API:
|
||||
|
||||
```yaml
|
||||
customization:
|
||||
extraKernelArgs: # optional
|
||||
- vga=791
|
||||
meta: # optional, allows to set initial Talos META
|
||||
- key: 0xa
|
||||
value: "{}"
|
||||
systemExtensions: # optional
|
||||
officialExtensions: # optional
|
||||
- siderolabs/gvisor
|
||||
- siderolabs/amd-ucode
|
||||
overlay: # optional
|
||||
name: rpi_generic
|
||||
image: siderolabs/sbc-raspberry-pi
|
||||
options: # optional, any valid yaml, depends on the overlay implementation
|
||||
data: "mydata"
|
||||
```
|
||||
|
||||
The "vanilla" schematic is:
|
||||
|
||||
```yaml
|
||||
customization:
|
||||
```
|
||||
|
||||
and has an ID of `376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba`.
|
||||
|
||||
The schematic can be applied by uploading it to the Image Factory:
|
||||
|
||||
```shell
|
||||
curl -X POST --data-binary @schematic.yaml https://factory.talos.dev/schematics
|
||||
```
|
||||
|
||||
As the schematic is content-addressable, the same schematic can be uploaded multiple times, and the Image Factory will return the same ID.
|
||||
|
||||
## Models
|
||||
|
||||
Models are Talos Linux images with customizations applied.
|
||||
The inputs to generate a model are:
|
||||
|
||||
* schematic ID
|
||||
* Talos Linux version
|
||||
* model type (e.g. ISO, UKI, etc.)
|
||||
* architecture (e.g. amd64, arm64)
|
||||
* various model type specific options (e.g. disk image format, disk image size, etc.)
|
||||
|
||||
## Frontends
|
||||
|
||||
Image Factory provides several frontends to retrieve models:
|
||||
|
||||
* HTTP frontend to download models (e.g. download an ISO or a disk image)
|
||||
* PXE frontend to boot bare-metal machines (PXE script references kernel/initramfs from HTTP frontend)
|
||||
* Registry frontend to fetch customized `installer` images (for initial Talos Linux installation and upgrades)
|
||||
|
||||
The links to different models are available in the [Image Factory UI](#ui), and a full list of possible models is documented at [GitHub](https://github.com/siderolabs/image-factory#readme).
|
||||
|
||||
In this guide we will provide a list of examples:
|
||||
|
||||
* amd64 ISO (for Talos {{< release >}}, "vanilla" schematic) [https://factory.talos.dev/image/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/metal-amd64.iso](https://factory.talos.dev/image/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/metal-amd64.iso)
|
||||
* arm64 AWS image (for Talos {{< release >}}, "vanilla" schematic) [https://factory.talos.dev/image/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/aws-arm64.raw.xz](https://factory.talos.dev/image/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/aws-arm64.raw.xz)
|
||||
* amd64 PXE boot script (for Talos {{< release >}}, "vanilla" schematic) [https://pxe.factory.talos.dev/pxe/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/metal-amd64](https://pxe.factory.talos.dev/pxe/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba/{{< release >}}/metal-amd64)
|
||||
* Talos `installer` image (for Talos {{< release >}}, "vanilla" schematic, architecture is detected automatically): `factory.talos.dev/installer/376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba:{{< release >}}`
|
||||
|
||||
The `installer` image can be used to install Talos Linux on a bare-metal machine, or to upgrade an existing Talos Linux installation.
|
||||
As the Talos version and schematic ID can be changed, via an upgrade process, the `installer` image can be used to upgrade to any version of Talos Linux, or replace a set of installed system extensions.
|
||||
|
||||
## UI
|
||||
|
||||
The Image Factory UI is available at https://factory.talos.dev.
|
||||
The UI provides a way to list supported Talos Linux versions, list of system extensions available for each release, and a way to generate schematic based on the selected system extensions.
|
||||
|
||||
The UI operations are equivalent to API operations.
|
||||
|
||||
## Find Schematic ID from Talos Installation
|
||||
|
||||
Image Factory always appends "virtual" system extension with the version matching schematic ID used to generate the model.
|
||||
So, for any running Talos Linux instance the schematic ID can be found by looking at the list of system extensions:
|
||||
|
||||
```shell
|
||||
$ talosctl get extensions
|
||||
NAMESPACE TYPE ID VERSION NAME VERSION
|
||||
runtime ExtensionStatus 0 1 schematic 376567988ad370138ad8b2698212367b8edcb69b5fd68c80be1f2ec7d603b4ba
|
||||
```
|
||||
|
||||
## Restrictions
|
||||
|
||||
Some models don't include every customization of the schematic:
|
||||
|
||||
* `installer` and `initramfs` images only support system extensions (kernel args and META are ignored)
|
||||
* `kernel` assets don't depend on the schematic
|
||||
|
||||
Other models have full support for all customizations:
|
||||
|
||||
* any disk image format
|
||||
* ISO, PXE boot script
|
||||
|
||||
When installing Talos Linux using ISO/PXE boot, Talos will be installed on the disk using the `installer` image, so the `installer` image in the machine configuration
|
||||
should be using the same schematic as the ISO/PXE boot image.
|
||||
|
||||
Some system extensions are not available for all Talos Linux versions, so an attempt to generate a model with an unsupported system extension will fail.
|
||||
List of supported Talos versions and supported system extensions for each version is available in the [Image Factory UI](#ui) and [API](https://github.com/siderolabs/image-factory#readme).
|
||||
|
||||
## Under the Hood
|
||||
|
||||
Image Factory is based on the Talos `imager` container which provides both the Talos base boot assets, and the ability to generate custom assets based on a configuration.
|
||||
Image Factory manages a set of `imager` container images to acquire base Talos Linux boot assets (`kernel`, `initramfs`), a set of Talos Linux system extension images, and a set of schematics.
|
||||
When a model is requested, Image Factory uses the `imager` container to generate the requested assets based on the schematic and the Talos Linux version.
|
||||
|
||||
## Security
|
||||
|
||||
Image Factory verifies signatures of all source container images fetched:
|
||||
|
||||
* `imager` container images (base boot assets)
|
||||
* `extensions` system extensions catalogs
|
||||
* `installer` contianer images (base installer layer)
|
||||
* Talos Linux system extension images
|
||||
|
||||
Internally, Image Factory caches generated boot assets and signs all cached images using a private key.
|
||||
Image Factory verifies the signature of the cached images before serving them to clients.
|
||||
|
||||
Image Factory signs generated `installer` images, and verifies the signature of the `installer` images before serving them to clients.
|
||||
|
||||
Image Factory does not provide a way to list all schematics, as schematics may contain sensitive information (e.g. private kernel boot arguments).
|
||||
As the schematic ID is content-addressable, it is not possible to guess the ID of a schematic without knowing the content of the schematic.
|
||||
|
||||
## Running your own Image Factory
|
||||
|
||||
Image Factory can be deployed on-premises to provide in-house asset generation.
|
||||
|
||||
Image Factory requires following components:
|
||||
|
||||
* an OCI registry to store schematics (private)
|
||||
* an OCI registry to store cached assets (private)
|
||||
* an OCI registry to store `installer` images (should allow public read-only access)
|
||||
* a container image signing key: ECDSA P-256 private key in PEM format
|
||||
|
||||
Image Factory is configured using command line flags, use `--help` to see a list of available flags.
|
||||
Image Factory should be configured to use proper authentication to push to the OCI registries:
|
||||
|
||||
* by mounting proper credentials via `~/.docker/config.json`
|
||||
* by supplying `GITHUB_TOKEN` (for `ghcr.io`)
|
||||
|
||||
Image Factory performs HTTP redirects to the public registry endpoint for `installer` images, so the public endpoint
|
||||
should be available to Talos Linux machines to pull the `installer` images.
|
||||
98
website/content/v1.12/learn-more/knowledge-base.md
Normal file
98
website/content/v1.12/learn-more/knowledge-base.md
Normal file
@ -0,0 +1,98 @@
|
||||
---
|
||||
title: "Knowledge Base"
|
||||
weight: 1999
|
||||
description: "Recipes for common configuration tasks with Talos Linux."
|
||||
---
|
||||
|
||||
## Disabling `GracefulNodeShutdown` on a node
|
||||
|
||||
Talos Linux enables [Graceful Node Shutdown](https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown) Kubernetes feature by default.
|
||||
|
||||
If this feature should be disabled, modify the `kubelet` part of the machine configuration with:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
kubelet:
|
||||
extraArgs:
|
||||
feature-gates: GracefulNodeShutdown=false
|
||||
extraConfig:
|
||||
shutdownGracePeriod: 0s
|
||||
shutdownGracePeriodCriticalPods: 0s
|
||||
```
|
||||
|
||||
## Generating Talos Linux ISO image with custom kernel arguments
|
||||
|
||||
Pass additional kernel arguments using `--extra-kernel-arg` flag:
|
||||
|
||||
```shell
|
||||
$ docker run --rm -i ghcr.io/siderolabs/imager:{{< release >}} iso --arch amd64 --tar-to-stdout --extra-kernel-arg console=ttyS1 --extra-kernel-arg console=tty0 | tar xz
|
||||
2022/05/25 13:18:47 copying /usr/install/amd64/vmlinuz to /mnt/boot/vmlinuz
|
||||
2022/05/25 13:18:47 copying /usr/install/amd64/initramfs.xz to /mnt/boot/initramfs.xz
|
||||
2022/05/25 13:18:47 creating grub.cfg
|
||||
2022/05/25 13:18:47 creating ISO
|
||||
```
|
||||
|
||||
ISO will be output to the file `talos-<arch>.iso` in the current directory.
|
||||
|
||||
## Logging Kubernetes audit logs with loki
|
||||
|
||||
If using loki-stack helm chart to gather logs from the Kubernetes cluster, you can use the helm values to configure loki-stack to log Kubernetes API server audit logs:
|
||||
|
||||
```yaml
|
||||
promtail:
|
||||
extraArgs:
|
||||
- -config.expand-env
|
||||
# this is required so that the promtail process can read the kube-apiserver audit logs written as `nobody` user
|
||||
containerSecurityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- DAC_READ_SEARCH
|
||||
extraVolumes:
|
||||
- name: audit-logs
|
||||
hostPath:
|
||||
path: /var/log/audit/kube
|
||||
extraVolumeMounts:
|
||||
- name: audit-logs
|
||||
mountPath: /var/log/audit/kube
|
||||
readOnly: true
|
||||
config:
|
||||
snippets:
|
||||
extraScrapeConfigs: |
|
||||
- job_name: auditlogs
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: auditlogs
|
||||
host: ${HOSTNAME}
|
||||
__path__: /var/log/audit/kube/*.log
|
||||
```
|
||||
|
||||
## Setting CPU scaling governor
|
||||
|
||||
While its possible to set [CPU scaling governor](https://kernelnewbies.org/Linux_5.9#CPU_Frequency_scaling) via `.machine.sysfs` it's sometimes cumbersome to set it for all CPU's individually.
|
||||
A more elegant approach would be set it via a kernel commandline parameter.
|
||||
This also means that the options are applied way early in the boot process.
|
||||
|
||||
This can be set in the machineconfig via the snippet below:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
install:
|
||||
extraKernelArgs:
|
||||
- cpufreq.default_governor=performance
|
||||
```
|
||||
|
||||
> Note: Talos needs to be upgraded for the `extraKernelArgs` to take effect.
|
||||
|
||||
## Disable `admissionControl` on control plane nodes
|
||||
|
||||
Talos Linux enables admission control in the API Server by default.
|
||||
|
||||
Although it is not recommended from a security point of view, admission control can be removed by patching your control plane machine configuration:
|
||||
|
||||
```bash
|
||||
talosctl gen config \
|
||||
my-cluster https://mycluster.local:6443 \
|
||||
--config-patch-control-plane '[{"op": "remove", "path": "/cluster/apiServer/admissionControl"}]'
|
||||
```
|
||||
207
website/content/v1.12/learn-more/kubespan.md
Normal file
207
website/content/v1.12/learn-more/kubespan.md
Normal file
@ -0,0 +1,207 @@
|
||||
---
|
||||
title: "KubeSpan"
|
||||
weight: 100
|
||||
description: "Understand more about KubeSpan for Talos Linux."
|
||||
---
|
||||
|
||||
## WireGuard Peer Discovery
|
||||
|
||||
The key pieces of information needed for WireGuard generally are:
|
||||
|
||||
- the public key of the host you wish to connect to
|
||||
- an IP address and port of the host you wish to connect to
|
||||
|
||||
The latter is really only required of _one_ side of the pair.
|
||||
Once traffic is received, that information is learned and updated by WireGuard automatically.
|
||||
|
||||
Kubernetes, though, also needs to know which traffic goes to which WireGuard peer.
|
||||
Because this information may be dynamic, we need a way to keep this information up to date.
|
||||
|
||||
If we already have a connection to Kubernetes, it's fairly easy: we can just keep that information in Kubernetes.
|
||||
Otherwise, we have to have some way to discover it.
|
||||
|
||||
Talos Linux implements a multi-tiered approach to gathering this information.
|
||||
Each tier can operate independently, but the amalgamation of the mechanisms produces a more robust set of connection criteria.
|
||||
|
||||
These mechanisms are:
|
||||
|
||||
- an external service
|
||||
- a Kubernetes-based system
|
||||
|
||||
See [discovery service]({{< relref "../talos-guides/discovery" >}}) to learn more about the external service.
|
||||
|
||||
The Kubernetes-based system utilizes annotations on Kubernetes Nodes which describe each node's public key and local addresses.
|
||||
|
||||
On top of this, KubeSpan can optionally route Pod subnets.
|
||||
This is usually taken care of by the CNI, but there are many situations where the CNI fails to be able to do this itself, across networks.
|
||||
|
||||
## NAT, Multiple Routes, Multiple IPs
|
||||
|
||||
One of the difficulties in communicating across networks is that there is often not a single address and port which can identify a connection for each node on the system.
|
||||
For instance, a node sitting on the same network might see its peer as `192.168.2.10`, but a node across the internet may see it as `2001:db8:1ef1::10`.
|
||||
|
||||
We need to be able to handle any number of addresses and ports, and we also need to have a mechanism to _try_ them.
|
||||
WireGuard only allows us to select one at a time.
|
||||
|
||||
KubeSpan implements a controller which continuously discovers and rotates these IP:port pairs until a connection is established.
|
||||
It then starts trying again if that connection ever fails.
|
||||
|
||||
## Packet Routing
|
||||
|
||||
After we have established a WireGuard connection, we have to make sure that the right packets get sent to the WireGuard interface.
|
||||
|
||||
WireGuard supplies a convenient facility for tagging packets which come from _it_, which is great.
|
||||
But in our case, we need to be able to allow traffic which both does _not_ come from WireGuard and _also_ is not destined for another Kubernetes node to flow through the normal mechanisms.
|
||||
|
||||
Unlike many corporate or privacy-oriented VPNs, we need to allow general internet traffic to flow normally.
|
||||
|
||||
Also, as our cluster grows, this set of IP addresses can become quite large and quite dynamic.
|
||||
This would be very cumbersome and slow in `iptables`.
|
||||
Luckily, the kernel supplies a convenient mechanism by which to define this arbitrarily large set of IP addresses: IP sets.
|
||||
|
||||
Talos collects all of the IPs and subnets which are considered "in-cluster" and maintains these in the kernel as an IP set.
|
||||
|
||||
Now that we have the IP set defined, we need to tell the kernel how to use it.
|
||||
|
||||
The traditional way of doing this would be to use `iptables`.
|
||||
However, there is a big problem with IPTables.
|
||||
It is a common namespace in which any number of other pieces of software may dump things.
|
||||
We have no surety that what we add will not be wiped out by something else (from Kubernetes itself, to the CNI, to some workload application), be rendered unusable by higher-priority rules, or just generally cause trouble and conflicts.
|
||||
|
||||
Instead, we use a three-pronged system which is both more foundational and less centralised.
|
||||
|
||||
NFTables offers a separately namespaced, decentralised way of marking packets for later processing based on IP sets.
|
||||
Instead of a common set of well-known tables, NFTables uses hooks into the kernel's netfilter system, which are less vulnerable to being usurped, bypassed, or a source of interference than IPTables, but which are rendered down by the kernel to the same underlying XTables system.
|
||||
|
||||
Our NFTables system is where we store the IP sets.
|
||||
Any packet which enters the system, either by forward from inside Kubernetes or by generation from the host itself, is compared against a hash table of this IP set.
|
||||
If it is matched, it is marked for later processing by our next stage.
|
||||
This is a high-performance system which exists fully in the kernel and which ultimately becomes an eBPF program, so it scales well to hundreds of nodes.
|
||||
|
||||
The next stage is the kernel router's route rules.
|
||||
These are defined as a common ordered list of operations for the whole operating system, but they are intended to be tightly constrained and are rarely used by applications in any case.
|
||||
The rules we add are very simple: if a packet is marked by our NFTables system, send it to an alternate routing table.
|
||||
|
||||
This leads us to our third and final stage of packet routing.
|
||||
We have a custom routing table with two rules:
|
||||
|
||||
- send all IPv4 traffic to the WireGuard interface
|
||||
- send all IPv6 traffic to the WireGuard interface
|
||||
|
||||
So in summary, we:
|
||||
|
||||
- mark packets destined for Kubernetes applications or Kubernetes nodes
|
||||
- send marked packets to a special routing table
|
||||
- send anything which is sent to that routing table through the WireGuard interface
|
||||
|
||||
This gives us an isolated, resilient, tolerant, and non-invasive way to route Kubernetes traffic safely, automatically, and transparently through WireGuard across almost any set of network topologies.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
### Routing
|
||||
|
||||
Routing for Wireguard is a touch complicated when the set of possible peer
|
||||
endpoints includes at least one member of the set of _destinations_.
|
||||
That is, packets from Wireguard to a peer endpoint should not be sent to
|
||||
Wireguard, lest a loop be created.
|
||||
|
||||
In order to handle this situation, Wireguard provides the ability to mark
|
||||
packets which it generates, so their routing can be handled separately.
|
||||
|
||||
In our case, though, we actually want the inverse of this: we want to route
|
||||
Wireguard packets however the normal networking routes and rules say they should
|
||||
be routed, while packets destined for the other side of Wireguard Peers should
|
||||
be forced into Wireguard interfaces.
|
||||
|
||||
While IP Rules allow you to invert matches, they do not support matching based
|
||||
on IP sets.
|
||||
That means, to use simple rules, we would have to add a rule for
|
||||
each destination, which could reach into hundreds or thousands of rules to
|
||||
manage.
|
||||
This is not really much of a performance issue, but it is a management
|
||||
issue, since it is expected that we would not be the only manager of rules in
|
||||
the system, and rules offer no facility to tag for ownership.
|
||||
|
||||
IP Sets are supported by IPTables, and we could integrate there.
|
||||
However, IPTables exists in a global namespace, which makes it fragile having
|
||||
multiple parties manipulating it.
|
||||
The newer NFTables replacement for IPTables, though, allows users to
|
||||
independently hook into various points of XTables, keeping all such rules and
|
||||
sets independent.
|
||||
This means that regardless of what CNIs or other user-side routing rules may do,
|
||||
our KubeSpan setup will not be messed up.
|
||||
|
||||
Therefore, we utilise NFTables (which natively supports IP sets and owner
|
||||
grouping) instead, to mark matching traffic which should be sent to the
|
||||
Wireguard interface.
|
||||
This way, we can keep all our KubeSpan set logic in one place, allowing us to
|
||||
simply use a single `ip rule` match:
|
||||
for our fwmark, and sending those matched packets to a separate routing table
|
||||
with one rule: default to the wireguard interface.
|
||||
|
||||
So we have three components:
|
||||
|
||||
1. A routing table for Wireguard-destined packets
|
||||
2. An NFTables table which defines the set of destinations packets to which will
|
||||
be marked with our firewall mark.
|
||||
- Hook into PreRouting (type Filter)
|
||||
- Hook into Outgoing (type Route)
|
||||
3. One IP Rule which sends packets marked with our firewall mark to our Wireguard
|
||||
routing table.
|
||||
|
||||
### Routing Table
|
||||
|
||||
The routing table (number 180 by default) is simple, containing a single route for each family: send everything through the Wireguard interface.
|
||||
|
||||
### NFTables
|
||||
|
||||
The logic inside NFTables is fairly simple.
|
||||
First, everything is compiled into a single table: `talos_kubespan`.
|
||||
|
||||
Next, two chains are set up: one for the `prerouting` hook (`kubespan_prerouting`)
|
||||
and the other for the `outgoing` hook (`kubespan_outgoing`).
|
||||
|
||||
We define two sets of target IP prefixes: one for IPv6 (`kubespan_targets_ipv6`)
|
||||
and the other for IPv4 (`kubespan_targets_ipv4`).
|
||||
|
||||
Last, we add rules to each chain which basically specify:
|
||||
|
||||
1. If the packet is marked as _from_ Wireguard, just accept it and terminate
|
||||
the chain.
|
||||
2. If the packet matches an IP in either of the target IP sets, mark that
|
||||
packet with the _to_ Wireguard mark.
|
||||
|
||||
### Rules
|
||||
|
||||
There are two route rules defined: one to match IPv6 packets and the other to
|
||||
match IPv4 packets.
|
||||
|
||||
These rules say the same thing for each: if the packet is marked that it should
|
||||
go _to_ Wireguard, send it to the Wireguard
|
||||
routing table.
|
||||
|
||||
### Firewall Mark
|
||||
|
||||
KubeSpan is using only two bits of the firewall mark with the mask `0x00000060`.
|
||||
|
||||
> Note: if other software on the node is using the bits `0x60` of the firewall mark, this
|
||||
> might cause conflicts and break KubeSpan.
|
||||
>
|
||||
> At the moment of the writing, it was confirmed that Calico CNI is using bits `0xffff0000` and
|
||||
> Cilium CNI is using bits `0xf00`, so KubeSpan is compatible with both.
|
||||
> Flannel CNI uses `0x4000` mask, so it is also compatible.
|
||||
|
||||
In the routing rules table, we match on the mark `0x40` with the mask `0x60`:
|
||||
|
||||
```text
|
||||
32500: from all fwmark 0x40/0x60 lookup 180
|
||||
```
|
||||
|
||||
In the NFTables table, we match with the same mask `0x60` and we set the mask by only modifying
|
||||
bits from the `0x60` mask:
|
||||
|
||||
```text
|
||||
meta mark & 0x00000060 == 0x00000020 accept
|
||||
ip daddr @kubespan_targets_ipv4 meta mark set meta mark & 0xffffffdf | 0x00000040 accept
|
||||
ip6 daddr @kubespan_targets_ipv6 meta mark set meta mark & 0xffffffdf | 0x00000040 accept
|
||||
```
|
||||
434
website/content/v1.12/learn-more/networking-resources.md
Normal file
434
website/content/v1.12/learn-more/networking-resources.md
Normal file
@ -0,0 +1,434 @@
|
||||
---
|
||||
title: "Networking Resources"
|
||||
weight: 70
|
||||
description: "Delve deeper into networking of Talos Linux."
|
||||
---
|
||||
|
||||
Talos network configuration subsystem is powered by [COSI]({{< relref "controllers-resources" >}}).
|
||||
Talos translates network configuration from multiple sources: machine configuration, cloud metadata, network automatic configuration (e.g. DHCP) into COSI resources.
|
||||
|
||||
Network configuration and network state can be inspected using `talosctl get` command.
|
||||
|
||||
Network machine configuration can be modified using `talosctl edit mc` command (also variants `talosctl patch mc`, `talosctl apply-config`) without a reboot.
|
||||
As API access requires network connection, [`--mode=try`]({{< relref "../talos-guides/configuration/editing-machine-configuration" >}})
|
||||
can be used to test the configuration with automatic rollback to avoid losing network access to the node.
|
||||
|
||||
## Resources
|
||||
|
||||
There are six basic network configuration items in Talos:
|
||||
|
||||
* `Address` (IP address assigned to the interface/link);
|
||||
* `Route` (route to a destination);
|
||||
* `Link` (network interface/link configuration);
|
||||
* `Resolver` (list of DNS servers);
|
||||
* `Hostname` (node hostname and domainname);
|
||||
* `TimeServer` (list of NTP servers).
|
||||
|
||||
Each network configuration item has two counterparts:
|
||||
|
||||
* `*Status` (e.g. `LinkStatus`) describes the current state of the system (Linux kernel state);
|
||||
* `*Spec` (e.g. `LinkSpec`) defines the desired configuration.
|
||||
|
||||
| Resource | Status | Spec |
|
||||
|--------------------|------------------------|----------------------|
|
||||
| `Address` | `AddressStatus` | `AddressSpec` |
|
||||
| `Route` | `RouteStatus` | `RouteSpec` |
|
||||
| `Link` | `LinkStatus` | `LinkSpec` |
|
||||
| `Resolver` | `ResolverStatus` | `ResolverSpec` |
|
||||
| `Hostname` | `HostnameStatus` | `HostnameSpec` |
|
||||
| `TimeServer` | `TimeServerStatus` | `TimeServerSpec` |
|
||||
|
||||
Status resources have aliases with the `Status` suffix removed, so for example
|
||||
`AddressStatus` is also available as `Address`.
|
||||
|
||||
Talos networking controllers reconcile the state so that `*Status` equals the desired `*Spec`.
|
||||
|
||||
## Observing State
|
||||
|
||||
The current network configuration state can be observed by querying `*Status` resources via
|
||||
`talosctl`:
|
||||
|
||||
```sh
|
||||
$ talosctl get addresses
|
||||
NODE NAMESPACE TYPE ID VERSION ADDRESS LINK
|
||||
172.20.0.2 network AddressStatus eth0/172.20.0.2/24 1 172.20.0.2/24 eth0
|
||||
172.20.0.2 network AddressStatus eth0/fe80::9804:17ff:fe9d:3058/64 2 fe80::9804:17ff:fe9d:3058/64 eth0
|
||||
172.20.0.2 network AddressStatus flannel.1/10.244.4.0/32 1 10.244.4.0/32 flannel.1
|
||||
172.20.0.2 network AddressStatus flannel.1/fe80::10b5:44ff:fe62:6fb8/64 2 fe80::10b5:44ff:fe62:6fb8/64 flannel.1
|
||||
172.20.0.2 network AddressStatus lo/127.0.0.1/8 1 127.0.0.1/8 lo
|
||||
172.20.0.2 network AddressStatus lo/::1/128 1 ::1/128 lo
|
||||
```
|
||||
|
||||
In the output there are addresses set up by Talos (e.g. `eth0/172.20.0.2/24`) and
|
||||
addresses set up by other facilities (e.g. `flannel.1/10.244.4.0/32` set up by CNI).
|
||||
|
||||
Talos networking controllers watch the kernel state and update resources
|
||||
accordingly.
|
||||
|
||||
Additional details about the address can be accessed via the YAML output:
|
||||
|
||||
```yaml
|
||||
# talosctl get address eth0/172.20.0.2/24 -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network
|
||||
type: AddressStatuses.net.talos.dev
|
||||
id: eth0/172.20.0.2/24
|
||||
version: 1
|
||||
owner: network.AddressStatusController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
spec:
|
||||
address: 172.20.0.2/24
|
||||
local: 172.20.0.2
|
||||
broadcast: 172.20.0.255
|
||||
linkIndex: 4
|
||||
linkName: eth0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
```
|
||||
|
||||
Resources can be watched for changes with the `--watch` flag to see how configuration changes over time.
|
||||
|
||||
Other networking status resources can be inspected with `talosctl get routes`, `talosctl get links`, etc.
|
||||
For example:
|
||||
|
||||
```sh
|
||||
$ talosctl get resolvers
|
||||
NODE NAMESPACE TYPE ID VERSION RESOLVERS
|
||||
172.20.0.2 network ResolverStatus resolvers 2 ["8.8.8.8","1.1.1.1"]
|
||||
```
|
||||
|
||||
```yaml
|
||||
# talosctl get links -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network
|
||||
type: LinkStatuses.net.talos.dev
|
||||
id: eth0
|
||||
version: 2
|
||||
owner: network.LinkStatusController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
spec:
|
||||
index: 4
|
||||
type: ether
|
||||
linkIndex: 0
|
||||
flags: UP,BROADCAST,RUNNING,MULTICAST,LOWER_UP
|
||||
hardwareAddr: 4e:95:8e:8f:e4:47
|
||||
broadcastAddr: ff:ff:ff:ff:ff:ff
|
||||
mtu: 1500
|
||||
queueDisc: pfifo_fast
|
||||
operationalState: up
|
||||
kind: ""
|
||||
slaveKind: ""
|
||||
driver: virtio_net
|
||||
linkState: true
|
||||
speedMbit: 4294967295
|
||||
port: Other
|
||||
duplex: Unknown
|
||||
```
|
||||
|
||||
## Inspecting Configuration
|
||||
|
||||
The desired networking configuration is combined from multiple sources and presented
|
||||
as `*Spec` resources:
|
||||
|
||||
```sh
|
||||
$ talosctl get addressspecs
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 network AddressSpec eth0/172.20.0.2/24 2
|
||||
172.20.0.2 network AddressSpec lo/127.0.0.1/8 2
|
||||
172.20.0.2 network AddressSpec lo/::1/128 2
|
||||
```
|
||||
|
||||
These `AddressSpecs` are applied to the Linux kernel to reach the desired state.
|
||||
If, for example, an `AddressSpec` is removed, the address is removed from the Linux network interface as well.
|
||||
|
||||
`*Spec` resources can't be manipulated directly, they are generated automatically by Talos
|
||||
from multiple configuration sources (see a section below for details).
|
||||
|
||||
If a `*Spec` resource is queried in YAML format, some additional information is available:
|
||||
|
||||
```yaml
|
||||
# talosctl get addressspecs eth0/172.20.0.2/24 -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network
|
||||
type: AddressSpecs.net.talos.dev
|
||||
id: eth0/172.20.0.2/24
|
||||
version: 2
|
||||
owner: network.AddressMergeController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
finalizers:
|
||||
- network.AddressSpecController
|
||||
spec:
|
||||
address: 172.20.0.2/24
|
||||
linkName: eth0
|
||||
family: inet4
|
||||
scope: global
|
||||
flags: permanent
|
||||
layer: operator
|
||||
```
|
||||
|
||||
An important field is the `layer` field, which describes a configuration layer this spec is coming from: in this case, it's generated by a network operator (see below) and is set by the DHCPv4 operator.
|
||||
|
||||
## Configuration Merging
|
||||
|
||||
Spec resources described in the previous section show the final merged configuration state,
|
||||
while initial specs are put to a different unmerged namespace `network-config`.
|
||||
Spec resources in the `network-config` namespace are merged with conflict resolution to produce the final merged representation in the `network` namespace.
|
||||
|
||||
Let's take `HostnameSpec` as an example.
|
||||
The final merged representation is:
|
||||
|
||||
```yaml
|
||||
# talosctl get hostnamespec -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network
|
||||
type: HostnameSpecs.net.talos.dev
|
||||
id: hostname
|
||||
version: 2
|
||||
owner: network.HostnameMergeController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
finalizers:
|
||||
- network.HostnameSpecController
|
||||
spec:
|
||||
hostname: talos-default-controlplane-1
|
||||
domainname: ""
|
||||
layer: operator
|
||||
```
|
||||
|
||||
We can see that the final configuration for the hostname is `talos-default-controlplane-1`.
|
||||
And this is the hostname that was actually applied.
|
||||
This can be verified by querying a `HostnameStatus` resource:
|
||||
|
||||
```sh
|
||||
$ talosctl get hostnamestatus
|
||||
NODE NAMESPACE TYPE ID VERSION HOSTNAME DOMAINNAME
|
||||
172.20.0.2 network HostnameStatus hostname 1 talos-default-controlplane-1
|
||||
```
|
||||
|
||||
Initial configuration for the hostname in the `network-config` namespace is:
|
||||
|
||||
```yaml
|
||||
# talosctl get hostnamespec -o yaml --namespace network-config
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network-config
|
||||
type: HostnameSpecs.net.talos.dev
|
||||
id: default/hostname
|
||||
version: 2
|
||||
owner: network.HostnameConfigController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
spec:
|
||||
hostname: talos-172-20-0-2
|
||||
domainname: ""
|
||||
layer: default
|
||||
---
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network-config
|
||||
type: HostnameSpecs.net.talos.dev
|
||||
id: dhcp4/eth0/hostname
|
||||
version: 1
|
||||
owner: network.OperatorSpecController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
spec:
|
||||
hostname: talos-default-controlplane-1
|
||||
domainname: ""
|
||||
layer: operator
|
||||
```
|
||||
|
||||
We can see that there are two specs for the hostname:
|
||||
|
||||
* one from the `default` configuration layer which defines the hostname as `talos-172-20-0-2` (default driven by the default node address);
|
||||
* another one from the layer `operator` that defines the hostname as `talos-default-controlplane-1` (DHCP).
|
||||
|
||||
Talos merges these two specs into a final `HostnameSpec` based on the configuration layer and merge rules.
|
||||
Here is the order of precedence from low to high:
|
||||
|
||||
* `default` (defaults provided by Talos);
|
||||
* `cmdline` (from the kernel command line);
|
||||
* `platform` (driven by the cloud provider);
|
||||
* `operator` (various dynamic configuration options: DHCP, Virtual IP, etc);
|
||||
* `configuration` (derived from the machine configuration).
|
||||
|
||||
So in our example the `operator` layer `HostnameSpec` overrides the `default` layer producing the final hostname `talos-default-controlplane-1`.
|
||||
|
||||
The merge process applies to all six core networking specs.
|
||||
For each spec, the `layer` controls the merge behavior
|
||||
If multiple configuration specs
|
||||
appear at the same layer, they can be merged together if possible, otherwise merge result
|
||||
is stable but not defined (e.g. if DHCP on multiple interfaces provides two different hostnames for the node).
|
||||
|
||||
`LinkSpecs` are merged across layers, so for example, machine configuration for the interface MTU overrides an MTU set by the DHCP server.
|
||||
|
||||
## Network Operators
|
||||
|
||||
Network operators provide dynamic network configuration which can change over time as the node is running:
|
||||
|
||||
* DHCPv4
|
||||
* DHCPv6
|
||||
* Virtual IP
|
||||
|
||||
Network operators produce specs for addresses, routes, links, etc., which are then merged and applied according to the rules described above.
|
||||
|
||||
Operators are configured with `OperatorSpec` resources which describe when operators
|
||||
should run and additional configuration for the operator:
|
||||
|
||||
```yaml
|
||||
# talosctl get operatorspecs -o yaml
|
||||
node: 172.20.0.2
|
||||
metadata:
|
||||
namespace: network
|
||||
type: OperatorSpecs.net.talos.dev
|
||||
id: dhcp4/eth0
|
||||
version: 1
|
||||
owner: network.OperatorConfigController
|
||||
phase: running
|
||||
created: 2021-06-29T20:23:18Z
|
||||
updated: 2021-06-29T20:23:18Z
|
||||
spec:
|
||||
operator: dhcp4
|
||||
linkName: eth0
|
||||
requireUp: true
|
||||
dhcp4:
|
||||
routeMetric: 1024
|
||||
```
|
||||
|
||||
`OperatorSpec` resources are generated by Talos based on machine configuration mostly.
|
||||
DHCP4 operator is created automatically for all physical network links which are not configured explicitly via the kernel command line or the machine configuration.
|
||||
This also means that on the first boot, without a machine configuration, a DHCP request is made on all physical network interfaces by default.
|
||||
|
||||
Specs generated by operators are prefixed with the operator ID (`dhcp4/eth0` in the example above) in the unmerged `network-config` namespace:
|
||||
|
||||
```sh
|
||||
$ talosctl -n 172.20.0.2 get addressspecs --namespace network-config
|
||||
NODE NAMESPACE TYPE ID VERSION
|
||||
172.20.0.2 network-config AddressSpec dhcp4/eth0/eth0/172.20.0.2/24 1
|
||||
```
|
||||
|
||||
## Other Network Resources
|
||||
|
||||
There are some additional resources describing the network subsystem state.
|
||||
|
||||
The `NodeAddress` resource presents node addresses excluding link-local and loopback addresses:
|
||||
|
||||
```sh
|
||||
$ talosctl get nodeaddresses
|
||||
NODE NAMESPACE TYPE ID VERSION ADDRESSES
|
||||
10.100.2.23 network NodeAddress accumulative 6 ["10.100.2.23","147.75.98.173","147.75.195.143","192.168.95.64","2604:1380:1:ca00::17"]
|
||||
10.100.2.23 network NodeAddress current 5 ["10.100.2.23","147.75.98.173","192.168.95.64","2604:1380:1:ca00::17"]
|
||||
10.100.2.23 network NodeAddress default 1 ["10.100.2.23"]
|
||||
```
|
||||
|
||||
* `default` is the node default address;
|
||||
* `current` is the set of addresses a node currently has;
|
||||
* `accumulative` is the set of addresses a node had over time (it might include virtual IPs which are not owned by the node at the moment).
|
||||
|
||||
`NodeAddress` resources are used to pick up the default address for `etcd` peer URL, to populate SANs field in the generated certificates, etc.
|
||||
|
||||
Another important resource is `Nodename` which provides `Node` name in Kubernetes:
|
||||
|
||||
```sh
|
||||
$ talosctl get nodename
|
||||
NODE NAMESPACE TYPE ID VERSION NODENAME
|
||||
10.100.2.23 controlplane Nodename nodename 1 infra-green-cp-mmf7v
|
||||
```
|
||||
|
||||
Depending on the machine configuration `nodename` might be just a hostname or the FQDN of the node.
|
||||
|
||||
`NetworkStatus` aggregates the current state of the network configuration:
|
||||
|
||||
```yaml
|
||||
# talosctl get networkstatus -o yaml
|
||||
node: 10.100.2.23
|
||||
metadata:
|
||||
namespace: network
|
||||
type: NetworkStatuses.net.talos.dev
|
||||
id: status
|
||||
version: 5
|
||||
owner: network.StatusController
|
||||
phase: running
|
||||
created: 2021-06-24T18:56:00Z
|
||||
updated: 2021-06-24T18:56:02Z
|
||||
spec:
|
||||
addressReady: true
|
||||
connectivityReady: true
|
||||
hostnameReady: true
|
||||
etcFilesReady: true
|
||||
```
|
||||
|
||||
## Network Controllers
|
||||
|
||||
For each of the six basic resource types, there are several controllers:
|
||||
|
||||
* `*StatusController` populates `*Status` resources observing the Linux kernel state.
|
||||
* `*ConfigController` produces the initial unmerged `*Spec` resources in the `network-config` namespace based on defaults, kernel command line, and machine configuration.
|
||||
* `*MergeController` merges `*Spec` resources into the final representation in the `network` namespace.
|
||||
* `*SpecController` applies merged `*Spec` resources to the kernel state.
|
||||
|
||||
For the network operators:
|
||||
|
||||
* `OperatorConfigController` produces `OperatorSpec` resources based on machine configuration and deafauls.
|
||||
* `OperatorSpecController` runs network operators watching `OperatorSpec` resources and producing various `*Spec` resources in the `network-config` namespace.
|
||||
|
||||
## Configuration Sources
|
||||
|
||||
There are several configuration sources for the network configuration, which are described in this section.
|
||||
|
||||
### Defaults
|
||||
|
||||
* `lo` interface is assigned addresses `127.0.0.1/8` and `::1/128`;
|
||||
* hostname is set to the `talos-<IP>` where `IP` is the default node address;
|
||||
* resolvers are set to `8.8.8.8`, `1.1.1.1`;
|
||||
* time servers are set to `pool.ntp.org`;
|
||||
* DHCP4 operator is run on any physical interface which is not configured explicitly.
|
||||
|
||||
### Cmdline
|
||||
|
||||
The kernel [command line]({{< relref "../reference/kernel" >}}) is parsed for the following options:
|
||||
|
||||
* `ip=` option is parsed for node IP, default gateway, hostname, DNS servers, NTP servers;
|
||||
* `bond=` option is parsed for bonding interfaces and their options;
|
||||
* `talos.hostname=` option is used to set node hostname;
|
||||
* `talos.network.interface.ignore=` can be used to make Talos skip network interface configuration completely.
|
||||
|
||||
### Platform
|
||||
|
||||
Platform configuration delivers cloud environment-specific options (e.g. the hostname).
|
||||
|
||||
Platform configuration is specific to the environment metadata: for example, on Equinix Metal, Talos automatically
|
||||
configures public and private IPs, routing, link bonding, hostname.
|
||||
|
||||
Platform configuration is cached across reboots in `/system/state/platform-network.yaml`.
|
||||
|
||||
### Operator
|
||||
|
||||
Network operators provide configuration for all basic resource types.
|
||||
|
||||
### Machine Configuration
|
||||
|
||||
The machine configuration is parsed for link configuration, addresses, routes, hostname,
|
||||
resolvers and time servers.
|
||||
Any changes to `.machine.network` configuration can be applied in immediate mode.
|
||||
|
||||
## Network Configuration Debugging
|
||||
|
||||
Most of the network controller operations and failures are logged to the kernel console,
|
||||
additional logs with `debug` level are available with `talosctl logs controller-runtime` command.
|
||||
If the network configuration can't be established and the API is not available, `debug` level
|
||||
logs can be sent to the console with `debug: true` option in the machine configuration.
|
||||
110
website/content/v1.12/learn-more/philosophy.md
Normal file
110
website/content/v1.12/learn-more/philosophy.md
Normal file
@ -0,0 +1,110 @@
|
||||
---
|
||||
title: Philosophy
|
||||
weight: 10
|
||||
description: "Learn about the philosophy behind the need for Talos Linux."
|
||||
---
|
||||
|
||||
## Distributed
|
||||
|
||||
Talos is intended to be operated in a distributed manner: it is built for a high-availability dataplane _first_.
|
||||
Its `etcd` cluster is built in an ad-hoc manner, with each appointed node joining on its own directive (with proper security validations enforced, of course).
|
||||
Like Kubernetes, workloads are intended to be distributed across any number of compute nodes.
|
||||
|
||||
There should be no single points of failure, and the level of required coordination is as low as each platform allows.
|
||||
|
||||
## Immutable
|
||||
|
||||
Talos takes immutability very seriously.
|
||||
Talos itself, even when installed on a disk, always runs from a SquashFS image, meaning that even if a directory is mounted to be writable, the image itself is never modified.
|
||||
All images are signed and delivered as single, versioned files.
|
||||
We can always run integrity checks on our image to verify that it has not been modified.
|
||||
|
||||
While Talos does allow a few, highly-controlled write points to the filesystem, we strive to make them as non-unique and non-critical as possible.
|
||||
We call the writable partition the "ephemeral" partition precisely because we want to make sure none of us ever uses it for unique, non-replicated, non-recreatable data.
|
||||
Thus, if all else fails, we can always wipe the disk and get back up and running.
|
||||
|
||||
## Minimal
|
||||
|
||||
We are always trying to reduce Talos' footprint.
|
||||
Because nearly the entire OS is built from scratch in Go, we are
|
||||
in a good position.
|
||||
We have no shell.
|
||||
We have no SSH.
|
||||
We have none of the GNU utilities, not even a rollup tool such as busybox.
|
||||
Everything in Talos is there because it is necessary, and
|
||||
nothing is included which isn't.
|
||||
|
||||
As a result, the OS right now produces a SquashFS image size of less than **80 MB**.
|
||||
|
||||
## Ephemeral
|
||||
|
||||
Everything Talos writes to its disk is either replicated or reconstructable.
|
||||
Since the controlplane is highly available, the loss of any node will cause
|
||||
neither service disruption nor loss of data.
|
||||
No writes are even allowed to the vast majority of the filesystem.
|
||||
We even call the writable partition "ephemeral" to keep this idea always in
|
||||
focus.
|
||||
|
||||
## Secure
|
||||
|
||||
Talos has always been designed with security in mind.
|
||||
With its immutability, its minimalism, its signing, and its componenture, we are
|
||||
able to simply bypass huge classes of vulnerabilities.
|
||||
Moreover, because of the way we have designed Talos, we are able to take
|
||||
advantage of a number of additional settings, such as the recommendations of the Kernel Self Protection Project (kspp) and completely disabling dynamic modules.
|
||||
|
||||
There are no passwords in Talos.
|
||||
All networked communication is encrypted and key-authenticated.
|
||||
The Talos certificates are short-lived and automatically-rotating.
|
||||
Kubernetes is always constructed with its own separate PKI structure which is
|
||||
enforced.
|
||||
|
||||
## Declarative
|
||||
|
||||
Everything which can be configured in Talos is done through a single YAML
|
||||
manifest.
|
||||
There is no scripting and no procedural steps.
|
||||
Everything is defined by the one declarative YAML file.
|
||||
This configuration includes that of both Talos itself and the Kubernetes which
|
||||
it forms.
|
||||
|
||||
This is achievable because Talos is tightly focused to do one thing: run
|
||||
Kubernetes, in the easiest, most secure, most reliable way it can.
|
||||
|
||||
## Not based on X distro
|
||||
|
||||
Talos Linux _isn't_ based on any other distribution.
|
||||
We think of ourselves as being the second-generation of
|
||||
container-optimised operating systems, where things like CoreOS, Flatcar, and Rancher represent the first generation (but the technology is not derived from any of those.)
|
||||
|
||||
Talos Linux is actually a ground-up rewrite of the userspace, from PID 1.
|
||||
We run the Linux kernel, but everything downstream of that is our own custom
|
||||
code, written in Go, rigorously-tested, and published as an immutable,
|
||||
integrated image.
|
||||
The Linux kernel launches what we call `machined`, for instance, not `systemd`.
|
||||
There is no `systemd` on our system.
|
||||
There are no GNU utilities, no shell, no SSH, no packages, nothing you could associate with
|
||||
any other distribution.
|
||||
|
||||
## An Operating System designed for Kubernetes
|
||||
|
||||
Technically, Talos Linux installs to a computer like any other operating system.
|
||||
_Unlike_ other operating systems, Talos is not meant to run alone, on a
|
||||
single machine.
|
||||
A design goal of Talos Linux is eliminating the management
|
||||
of individual nodes as much as possible.
|
||||
In order to do that, Talos Linux operates as a cluster of machines, with lots of
|
||||
checking and coordination between them, at all levels.
|
||||
|
||||
There is only a cluster.
|
||||
Talos is meant to do one thing: maintain a Kubernetes cluster, and it does this
|
||||
very, very well.
|
||||
|
||||
The entirety of the configuration of any machine is specified by a single
|
||||
configuration file, which can often be the _same_ configuration file used
|
||||
across _many_ machines.
|
||||
Much like a biological system, if some component misbehaves, just cut it out and
|
||||
let a replacement grow.
|
||||
Rebuilds of Talos are remarkably fast, whether they be new machines, upgrades,
|
||||
or reinstalls.
|
||||
Never get hung up on an individual machine.
|
||||
23
website/content/v1.12/learn-more/process-capabilities.md
Normal file
23
website/content/v1.12/learn-more/process-capabilities.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
title: "Process Capabilities"
|
||||
weight: 105
|
||||
description: "Understand the Linux process capabilities restrictions with Talos Linux."
|
||||
---
|
||||
|
||||
Linux defines a set of [process capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html) that can be used to fine-tune the process permissions.
|
||||
|
||||
Talos Linux for security reasons restricts any process from gaining the following capabilities:
|
||||
|
||||
* `CAP_SYS_MODULE` (loading kernel modules)
|
||||
* `CAP_SYS_BOOT` (rebooting the system)
|
||||
|
||||
This means that any process including privileged Kubernetes pods will not be able to get these capabilities.
|
||||
|
||||
If you see the following error on starting a pod, make sure it doesn't have any of the capabilities listed above in the spec:
|
||||
|
||||
```text
|
||||
Error: failed to create containerd task: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: unable to apply caps: operation not permitted: unknown
|
||||
```
|
||||
|
||||
> Note: even with `CAP_SYS_MODULE` capability, Linux kernel module loading is restricted by requiring a valid signature.
|
||||
> Talos Linux creates a throw away signing key during kernel build, so it's not possible to build/sign a kernel module for Talos Linux outside of the build process.
|
||||
@ -0,0 +1,74 @@
|
||||
---
|
||||
title: "Network Connectivity"
|
||||
weight: 80
|
||||
description: "Description of the Networking Connectivity needed by Talos Linux"
|
||||
aliases:
|
||||
- ../guides/configuring-network-connectivity
|
||||
---
|
||||
|
||||
## Configuring Network Connectivity
|
||||
|
||||
The simplest way to deploy Talos is by ensuring that all the remote components of the system (`talosctl`, the control plane nodes, and worker nodes) all have layer 2 connectivity.
|
||||
This is not always possible, however, so this page lays out the minimal network access that is required to configure and operate a talos cluster.
|
||||
|
||||
> Note: These are the ports required for Talos specifically, and should be configured _in addition_ to the ports required by kubernetes.
|
||||
> See the [kubernetes docs](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/#check-required-ports) for information on the ports used by kubernetes itself.
|
||||
|
||||
### Control plane node(s)
|
||||
|
||||
<table class="table-auto">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="px-4 py-2">Protocol</th>
|
||||
<th class="px-4 py-2">Direction</th>
|
||||
<th class="px-4 py-2">Port Range</th>
|
||||
<th class="px-4 py-2">Purpose</th>
|
||||
<th class="px-4 py-2">Used By</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="border px-4 py-2">TCP</td>
|
||||
<td class="border px-4 py-2">Inbound</td>
|
||||
<td class="border px-4 py-2">50000*</td>
|
||||
<td class="border px-4 py-2"><a href="../../learn-more/components/#apid">apid</a></td>
|
||||
<td class="border px-4 py-2">talosctl, control plane nodes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border px-4 py-2">TCP</td>
|
||||
<td class="border px-4 py-2">Inbound</td>
|
||||
<td class="border px-4 py-2">50001*</td>
|
||||
<td class="border px-4 py-2"><a href="../../learn-more/components/#trustd">trustd</a></td>
|
||||
<td class="border px-4 py-2">Worker nodes</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
> Ports marked with a `*` are not currently configurable, but that may change in the future.
|
||||
> [Follow along here](https://github.com/siderolabs/talos/issues/1836).
|
||||
|
||||
### Worker node(s)
|
||||
|
||||
<table class="table-auto">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="px-4 py-2">Protocol</th>
|
||||
<th class="px-4 py-2">Direction</th>
|
||||
<th class="px-4 py-2">Port Range</th>
|
||||
<th class="px-4 py-2">Purpose</th>
|
||||
<th class="px-4 py-2">Used By</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="border px-4 py-2">TCP</td>
|
||||
<td class="border px-4 py-2">Inbound</td>
|
||||
<td class="border px-4 py-2">50000*</td>
|
||||
<td class="border px-4 py-2"><a href="../../learn-more/components/#apid">apid</a></td>
|
||||
<td class="border px-4 py-2">Control plane nodes</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
> Ports marked with a `*` are not currently configurable, but that may change in the future.
|
||||
> [Follow along here](https://github.com/siderolabs/talos/issues/1836).
|
||||
62
website/content/v1.12/learn-more/talosctl.md
Normal file
62
website/content/v1.12/learn-more/talosctl.md
Normal file
@ -0,0 +1,62 @@
|
||||
---
|
||||
title: "talosctl"
|
||||
weight: 110
|
||||
description: "The design and use of the Talos Linux control application."
|
||||
---
|
||||
|
||||
The `talosctl` tool acts as a reference implementation for the Talos API, but it also handles a lot of
|
||||
conveniences for the use of Talos and its clusters.
|
||||
|
||||
### Video Walkthrough
|
||||
|
||||
To see some live examples of talosctl usage, view the following video:
|
||||
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/pl0l_K_3Y6o" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
## Client Configuration
|
||||
|
||||
Talosctl configuration is located in `$XDG_CONFIG_HOME/talos/config.yaml` if `$XDG_CONFIG_HOME` is defined.
|
||||
Otherwise it is in `$HOME/.talos/config`.
|
||||
The location can always be overridden by the `TALOSCONFIG` environment variable or the `--talosconfig` parameter.
|
||||
|
||||
Like `kubectl`, `talosctl` uses the concept of configuration contexts, so any number of Talos clusters can be managed with a single configuration file.
|
||||
It also comes with some intelligent tooling to manage the merging of new contexts into the config.
|
||||
The default operation is a non-destructive merge, where if a context of the same name already exists in the file, the context to be added is renamed by appending an index number.
|
||||
You can easily overwrite instead, as well.
|
||||
See the `talosctl config help` for more information.
|
||||
|
||||
## Endpoints and Nodes
|
||||
|
||||

|
||||
|
||||
`endpoints` are the communication endpoints to which the client directly talks.
|
||||
These can be load balancers, DNS hostnames, a list of IPs, etc.
|
||||
If multiple endpoints are specified, the client will automatically load
|
||||
balance and fail over between them.
|
||||
It is recommended that these point to the set of control plane nodes, either directly or through a load balancer.
|
||||
|
||||
Each endpoint will automatically proxy requests destined to another node through it, so it is not necessary to change the endpoint configuration just because you wish to talk to a different node within the cluster.
|
||||
|
||||
Endpoints _do_, however, need to be members of the same Talos cluster as the target node, because these proxied connections reply on certificate-based authentication.
|
||||
|
||||
The `node` is the target node on which you wish to perform the API call.
|
||||
While you can configure the target node (or even set of target nodes) inside the 'talosctl' configuration file, it is recommended not to do so, but to explicitly declare the target node(s) using the `-n` or `--nodes` command-line parameter.
|
||||
|
||||
> When specifying nodes, their IPs and/or hostnames are as seen by the endpoint servers, not as from the client.
|
||||
> This is because all connections are proxied first through the endpoints.
|
||||
|
||||
## Kubeconfig
|
||||
|
||||
The configuration for accessing a Talos Kubernetes cluster is obtained with `talosctl`.
|
||||
By default, `talosctl` will safely merge the cluster into the default kubeconfig.
|
||||
Like `talosctl` itself, in the event of a naming conflict, the new context name will be index-appended before insertion.
|
||||
The `--force` option can be used to overwrite instead.
|
||||
|
||||
You can also specify an alternate path by supplying it as a positional parameter.
|
||||
|
||||
Thus, like Talos clusters themselves, `talosctl` makes it easy to manage any
|
||||
number of kubernetes clusters from the same workstation.
|
||||
|
||||
## Commands
|
||||
|
||||
Please see the [CLI reference]({{< relref "../reference/cli" >}}) for the entire list of commands which are available from `talosctl`.
|
||||
4
website/content/v1.12/reference/_index.md
Normal file
4
website/content/v1.12/reference/_index.md
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
title: "Reference"
|
||||
weight: 70
|
||||
---
|
||||
9707
website/content/v1.12/reference/api.md
Normal file
9707
website/content/v1.12/reference/api.md
Normal file
File diff suppressed because it is too large
Load Diff
3011
website/content/v1.12/reference/cli.md
Normal file
3011
website/content/v1.12/reference/cli.md
Normal file
File diff suppressed because it is too large
Load Diff
28
website/content/v1.12/reference/configuration/_index.md
Normal file
28
website/content/v1.12/reference/configuration/_index.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
title: Configuration
|
||||
description: Talos Linux machine configuration reference.
|
||||
---
|
||||
|
||||
Talos Linux machine is fully configured via a single YAML file called *machine configuration*.
|
||||
|
||||
The file might contain one or more configuration documents separated by `---` (three dashes) lines.
|
||||
At the moment, majority of the configuration options are within the [v1alpha1]({{< relref "./v1alpha1" >}}) document, so
|
||||
this is the only mandatory document in the configuration file.
|
||||
|
||||
Configuration documents might be named (contain a `name:` field) or unnamed.
|
||||
Unnamed documents can be supplied to the machine configuration file only once, while named documents can be supplied multiple times with unique names.
|
||||
|
||||
The `v1alpha1` document has its own (legacy) structure, while every other document has the following set of fields:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1alpha1 # version of the document
|
||||
kind: NetworkRuleConfig # type of document
|
||||
name: rule1 # only for named documents
|
||||
```
|
||||
|
||||
This section contains the configuration reference, to learn more about Talos Linux machine configuration management, please see:
|
||||
|
||||
* [quick guide to configuration generation]({{< relref "../../introduction/getting-started#configure-talos-linux" >}})
|
||||
* [configuration management in production]({{< relref "../../introduction/prodnotes#configure-talos" >}})
|
||||
* [configuration patches]({{< relref "../../talos-guides/configuration/patching" >}})
|
||||
* [editing live machine configuration]({{< relref "../../talos-guides/configuration/editing-machine-configuration" >}})
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package block provides block device and volume configuration documents.
|
||||
title: block
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,94 @@
|
||||
---
|
||||
description: |
|
||||
ExistingVolumeConfig is an existing volume configuration document.
|
||||
Existing volumes allow to mount partitions (or whole disks) that were created
|
||||
outside of Talos. Volume will be mounted under `/var/mnt/<name>`.
|
||||
The existing volume config name should not conflict with user volume names.
|
||||
title: ExistingVolumeConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: ExistingVolumeConfig
|
||||
name: my-existing-volume # Name of the volume.
|
||||
# The discovery describes how to find a volume.
|
||||
discovery:
|
||||
# The volume selector expression.
|
||||
volumeSelector:
|
||||
match: volume.partition_label == "MY-DATA" # The Common Expression Language (CEL) expression to match the volume.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the volume.<br><br>Name can only contain:<br>lowercase and uppercase ASCII letters, digits, and hyphens. | |
|
||||
|`discovery` |<a href="#ExistingVolumeConfig.discovery">VolumeDiscoverySpec</a> |The discovery describes how to find a volume. | |
|
||||
|`mount` |<a href="#ExistingVolumeConfig.mount">MountSpec</a> |The mount describes additional mount options. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## discovery {#ExistingVolumeConfig.discovery}
|
||||
|
||||
VolumeDiscoverySpec describes how the volume is discovered.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`volumeSelector` |<a href="#ExistingVolumeConfig.discovery.volumeSelector">VolumeSelector</a> |The volume selector expression. | |
|
||||
|
||||
|
||||
|
||||
|
||||
### volumeSelector {#ExistingVolumeConfig.discovery.volumeSelector}
|
||||
|
||||
VolumeSelector selects an existing volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`match` |Expression |The Common Expression Language (CEL) expression to match the volume. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
match: volume.partition_label == "MY-DATA"
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
match: volume.name == "xfs" && disk.serial == "SERIAL123"
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## mount {#ExistingVolumeConfig.mount}
|
||||
|
||||
MountSpec describes how the volume is mounted.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`readOnly` |bool |Mount the volume read-only. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,257 @@
|
||||
---
|
||||
description: |
|
||||
RawVolumeConfig is a raw volume configuration document.
|
||||
Raw volumes allow to create partitions without formatting them.
|
||||
If you want to use local storage, user volumes is a better choice,
|
||||
raw volumes are intended to be used with CSI provisioners.
|
||||
The partition label is automatically generated as `r-<name>`.
|
||||
title: RawVolumeConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: RawVolumeConfig
|
||||
name: ceph-data # Name of the volume.
|
||||
# The provisioning describes how the volume is provisioned.
|
||||
provisioning:
|
||||
# The disk selector expression.
|
||||
diskSelector:
|
||||
match: disk.transport == "nvme" # The Common Expression Language (CEL) expression to match the disk.
|
||||
maxSize: 50GiB # The maximum size of the volume, if not specified the volume can grow to the size of the
|
||||
|
||||
# # The minimum size of the volume.
|
||||
# minSize: 2.5GiB
|
||||
|
||||
# # The encryption describes how the volume is encrypted.
|
||||
# encryption:
|
||||
# provider: luks2 # Encryption provider to use for the encryption.
|
||||
# # Defines the encryption keys generation and storage method.
|
||||
# keys:
|
||||
# - slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# # Key which value is stored in the configuration file.
|
||||
# static:
|
||||
# passphrase: exampleKey # Defines the static passphrase value.
|
||||
#
|
||||
# # # KMS managed encryption key.
|
||||
# # kms:
|
||||
# # endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
# - slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
# cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
# blockSize: 4096 # Defines the encryption sector size.
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the volume.<br><br>Name might be between 1 and 34 characters long and can only contain:<br>lowercase and uppercase ASCII letters, digits, and hyphens. | |
|
||||
|`provisioning` |<a href="#RawVolumeConfig.provisioning">ProvisioningSpec</a> |The provisioning describes how the volume is provisioned. | |
|
||||
|`encryption` |<a href="#RawVolumeConfig.encryption">EncryptionSpec</a> |The encryption describes how the volume is encrypted. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## provisioning {#RawVolumeConfig.provisioning}
|
||||
|
||||
ProvisioningSpec describes how the volume is provisioned.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`diskSelector` |<a href="#RawVolumeConfig.provisioning.diskSelector">DiskSelector</a> |The disk selector expression. | |
|
||||
|`grow` |bool |Should the volume grow to the size of the disk (if possible). | |
|
||||
|`minSize` |ByteSize |The minimum size of the volume.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
minSize: 2.5GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|`maxSize` |ByteSize |The maximum size of the volume, if not specified the volume can grow to the size of the<br>disk.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
maxSize: 50GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
### diskSelector {#RawVolumeConfig.provisioning.diskSelector}
|
||||
|
||||
DiskSelector selects a disk for the volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`match` |Expression |The Common Expression Language (CEL) expression to match the disk. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
match: disk.size > 120u * GB && disk.size < 1u * TB
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
match: disk.transport == "sata" && !disk.rotational && !system_disk
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## encryption {#RawVolumeConfig.encryption}
|
||||
|
||||
EncryptionSpec represents volume encryption settings.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: exampleKey # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
- slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# KMS managed encryption key.
|
||||
kms:
|
||||
endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
blockSize: 4096 # Defines the encryption sector size.
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`provider` |EncryptionProviderType |Encryption provider to use for the encryption. |`luks2`<br /> |
|
||||
|`keys` |<a href="#RawVolumeConfig.encryption.keys.">[]EncryptionKey</a> |Defines the encryption keys generation and storage method. | |
|
||||
|`cipher` |string |Cipher to use for the encryption. Depends on the encryption provider. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
cipher: aes-xts-plain64
|
||||
{{< /highlight >}}</details> |`aes-xts-plain64`<br />`xchacha12,aes-adiantum-plain64`<br />`xchacha20,aes-adiantum-plain64`<br /> |
|
||||
|`keySize` |uint |Defines the encryption key length. | |
|
||||
|`blockSize` |uint64 |Defines the encryption sector size. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
blockSize: 4096
|
||||
{{< /highlight >}}</details> | |
|
||||
|`options` |[]string |Additional --perf parameters for the LUKS2 encryption. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
options:
|
||||
- no_read_workqueue
|
||||
- no_write_workqueue
|
||||
{{< /highlight >}}</details> |`no_read_workqueue`<br />`no_write_workqueue`<br />`same_cpu_crypt`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
### keys[] {#RawVolumeConfig.encryption.keys.}
|
||||
|
||||
EncryptionKey represents configuration for disk encryption key.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`slot` |int |Key slot number for LUKS2 encryption. | |
|
||||
|`static` |<a href="#RawVolumeConfig.encryption.keys..static">EncryptionKeyStatic</a> |Key which value is stored in the configuration file. | |
|
||||
|`nodeID` |<a href="#RawVolumeConfig.encryption.keys..nodeID">EncryptionKeyNodeID</a> |Deterministically generated key from the node UUID and PartitionLabel. | |
|
||||
|`kms` |<a href="#RawVolumeConfig.encryption.keys..kms">EncryptionKeyKMS</a> |KMS managed encryption key. | |
|
||||
|`tpm` |<a href="#RawVolumeConfig.encryption.keys..tpm">EncryptionKeyTPM</a> |Enable TPM based disk encryption. | |
|
||||
|`lockToState` |bool |Lock the disk encryption key to the random salt stored in the STATE partition. This is useful to prevent the volume from being unlocked if STATE partition is compromised or replaced. It is recommended to use this option with TPM disk encryption for non-STATE volumes. | |
|
||||
|
||||
|
||||
|
||||
|
||||
#### static {#RawVolumeConfig.encryption.keys..static}
|
||||
|
||||
EncryptionKeyStatic represents throw away key type.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`passphrase` |string |Defines the static passphrase value. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### nodeID {#RawVolumeConfig.encryption.keys..nodeID}
|
||||
|
||||
EncryptionKeyNodeID represents deterministically generated key from the node UUID and PartitionLabel.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### kms {#RawVolumeConfig.encryption.keys..kms}
|
||||
|
||||
EncryptionKeyKMS represents a key that is generated and then sealed/unsealed by the KMS server.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
keys:
|
||||
- kms:
|
||||
endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`endpoint` |string |KMS endpoint to Seal/Unseal the key. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### tpm {#RawVolumeConfig.encryption.keys..tpm}
|
||||
|
||||
EncryptionKeyTPM represents a key that is generated and then sealed/unsealed by the TPM.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`checkSecurebootStatusOnEnroll` |bool |Check that Secureboot is enabled in the EFI firmware.<br>If Secureboot is not enabled, the enrollment of the key will fail. As the TPM key is anyways bound to the value of PCR 7, changing Secureboot status or configuration after the initial enrollment will make the key unusable. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,254 @@
|
||||
---
|
||||
description: |
|
||||
SwapVolumeConfig is a disk swap volume configuration document.
|
||||
Swap volume is automatically allocated as a partition on the specified disk
|
||||
and activated as swap, removing a swap volume deactivates swap.
|
||||
The partition label is automatically generated as `s-<name>`.
|
||||
title: SwapVolumeConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: SwapVolumeConfig
|
||||
name: swap1 # Name of the volume.
|
||||
# The provisioning describes how the volume is provisioned.
|
||||
provisioning:
|
||||
# The disk selector expression.
|
||||
diskSelector:
|
||||
match: disk.transport == "nvme" # The Common Expression Language (CEL) expression to match the disk.
|
||||
minSize: 3GiB # The minimum size of the volume.
|
||||
maxSize: 4GiB # The maximum size of the volume, if not specified the volume can grow to the size of the
|
||||
# The encryption describes how the volume is encrypted.
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: swapsecret # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
|
||||
# # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
# cipher: aes-xts-plain64
|
||||
|
||||
# # Defines the encryption sector size.
|
||||
# blockSize: 4096
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the volume.<br><br>Name might be between 1 and 34 characters long and can only contain:<br>lowercase and uppercase ASCII letters, digits, and hyphens. | |
|
||||
|`provisioning` |<a href="#SwapVolumeConfig.provisioning">ProvisioningSpec</a> |The provisioning describes how the volume is provisioned. | |
|
||||
|`encryption` |<a href="#SwapVolumeConfig.encryption">EncryptionSpec</a> |The encryption describes how the volume is encrypted. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## provisioning {#SwapVolumeConfig.provisioning}
|
||||
|
||||
ProvisioningSpec describes how the volume is provisioned.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`diskSelector` |<a href="#SwapVolumeConfig.provisioning.diskSelector">DiskSelector</a> |The disk selector expression. | |
|
||||
|`grow` |bool |Should the volume grow to the size of the disk (if possible). | |
|
||||
|`minSize` |ByteSize |The minimum size of the volume.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
minSize: 2.5GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|`maxSize` |ByteSize |The maximum size of the volume, if not specified the volume can grow to the size of the<br>disk.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
maxSize: 50GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
### diskSelector {#SwapVolumeConfig.provisioning.diskSelector}
|
||||
|
||||
DiskSelector selects a disk for the volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`match` |Expression |The Common Expression Language (CEL) expression to match the disk. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
match: disk.size > 120u * GB && disk.size < 1u * TB
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
match: disk.transport == "sata" && !disk.rotational && !system_disk
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## encryption {#SwapVolumeConfig.encryption}
|
||||
|
||||
EncryptionSpec represents volume encryption settings.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: exampleKey # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
- slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# KMS managed encryption key.
|
||||
kms:
|
||||
endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
blockSize: 4096 # Defines the encryption sector size.
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`provider` |EncryptionProviderType |Encryption provider to use for the encryption. |`luks2`<br /> |
|
||||
|`keys` |<a href="#SwapVolumeConfig.encryption.keys.">[]EncryptionKey</a> |Defines the encryption keys generation and storage method. | |
|
||||
|`cipher` |string |Cipher to use for the encryption. Depends on the encryption provider. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
cipher: aes-xts-plain64
|
||||
{{< /highlight >}}</details> |`aes-xts-plain64`<br />`xchacha12,aes-adiantum-plain64`<br />`xchacha20,aes-adiantum-plain64`<br /> |
|
||||
|`keySize` |uint |Defines the encryption key length. | |
|
||||
|`blockSize` |uint64 |Defines the encryption sector size. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
blockSize: 4096
|
||||
{{< /highlight >}}</details> | |
|
||||
|`options` |[]string |Additional --perf parameters for the LUKS2 encryption. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
options:
|
||||
- no_read_workqueue
|
||||
- no_write_workqueue
|
||||
{{< /highlight >}}</details> |`no_read_workqueue`<br />`no_write_workqueue`<br />`same_cpu_crypt`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
### keys[] {#SwapVolumeConfig.encryption.keys.}
|
||||
|
||||
EncryptionKey represents configuration for disk encryption key.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`slot` |int |Key slot number for LUKS2 encryption. | |
|
||||
|`static` |<a href="#SwapVolumeConfig.encryption.keys..static">EncryptionKeyStatic</a> |Key which value is stored in the configuration file. | |
|
||||
|`nodeID` |<a href="#SwapVolumeConfig.encryption.keys..nodeID">EncryptionKeyNodeID</a> |Deterministically generated key from the node UUID and PartitionLabel. | |
|
||||
|`kms` |<a href="#SwapVolumeConfig.encryption.keys..kms">EncryptionKeyKMS</a> |KMS managed encryption key. | |
|
||||
|`tpm` |<a href="#SwapVolumeConfig.encryption.keys..tpm">EncryptionKeyTPM</a> |Enable TPM based disk encryption. | |
|
||||
|`lockToState` |bool |Lock the disk encryption key to the random salt stored in the STATE partition. This is useful to prevent the volume from being unlocked if STATE partition is compromised or replaced. It is recommended to use this option with TPM disk encryption for non-STATE volumes. | |
|
||||
|
||||
|
||||
|
||||
|
||||
#### static {#SwapVolumeConfig.encryption.keys..static}
|
||||
|
||||
EncryptionKeyStatic represents throw away key type.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`passphrase` |string |Defines the static passphrase value. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### nodeID {#SwapVolumeConfig.encryption.keys..nodeID}
|
||||
|
||||
EncryptionKeyNodeID represents deterministically generated key from the node UUID and PartitionLabel.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### kms {#SwapVolumeConfig.encryption.keys..kms}
|
||||
|
||||
EncryptionKeyKMS represents a key that is generated and then sealed/unsealed by the KMS server.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
keys:
|
||||
- kms:
|
||||
endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`endpoint` |string |KMS endpoint to Seal/Unseal the key. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### tpm {#SwapVolumeConfig.encryption.keys..tpm}
|
||||
|
||||
EncryptionKeyTPM represents a key that is generated and then sealed/unsealed by the TPM.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`checkSecurebootStatusOnEnroll` |bool |Check that Secureboot is enabled in the EFI firmware.<br>If Secureboot is not enabled, the enrollment of the key will fail. As the TPM key is anyways bound to the value of PCR 7, changing Secureboot status or configuration after the initial enrollment will make the key unusable. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,284 @@
|
||||
---
|
||||
description: |
|
||||
UserVolumeConfig is a user volume configuration document.
|
||||
User volume is automatically allocated as a partition on the specified disk
|
||||
and mounted under `/var/mnt/<name>`.
|
||||
The partition label is automatically generated as `u-<name>`.
|
||||
title: UserVolumeConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: UserVolumeConfig
|
||||
name: ceph-data # Name of the volume.
|
||||
# The provisioning describes how the volume is provisioned.
|
||||
provisioning:
|
||||
# The disk selector expression.
|
||||
diskSelector:
|
||||
match: disk.transport == "nvme" # The Common Expression Language (CEL) expression to match the disk.
|
||||
maxSize: 50GiB # The maximum size of the volume, if not specified the volume can grow to the size of the
|
||||
|
||||
# # The minimum size of the volume.
|
||||
# minSize: 2.5GiB
|
||||
# The filesystem describes how the volume is formatted.
|
||||
filesystem:
|
||||
type: xfs # Filesystem type. Default is `xfs`.
|
||||
# The encryption describes how the volume is encrypted.
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Enable TPM based disk encryption.
|
||||
tpm: {}
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
- slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: topsecret # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
|
||||
# # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
# cipher: aes-xts-plain64
|
||||
|
||||
# # Defines the encryption sector size.
|
||||
# blockSize: 4096
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the volume.<br><br>Name might be between 1 and 34 characters long and can only contain:<br>lowercase and uppercase ASCII letters, digits, and hyphens. | |
|
||||
|`provisioning` |<a href="#UserVolumeConfig.provisioning">ProvisioningSpec</a> |The provisioning describes how the volume is provisioned. | |
|
||||
|`filesystem` |<a href="#UserVolumeConfig.filesystem">FilesystemSpec</a> |The filesystem describes how the volume is formatted. | |
|
||||
|`encryption` |<a href="#UserVolumeConfig.encryption">EncryptionSpec</a> |The encryption describes how the volume is encrypted. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## provisioning {#UserVolumeConfig.provisioning}
|
||||
|
||||
ProvisioningSpec describes how the volume is provisioned.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`diskSelector` |<a href="#UserVolumeConfig.provisioning.diskSelector">DiskSelector</a> |The disk selector expression. | |
|
||||
|`grow` |bool |Should the volume grow to the size of the disk (if possible). | |
|
||||
|`minSize` |ByteSize |The minimum size of the volume.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
minSize: 2.5GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|`maxSize` |ByteSize |The maximum size of the volume, if not specified the volume can grow to the size of the<br>disk.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
maxSize: 50GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
### diskSelector {#UserVolumeConfig.provisioning.diskSelector}
|
||||
|
||||
DiskSelector selects a disk for the volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`match` |Expression |The Common Expression Language (CEL) expression to match the disk. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
match: disk.size > 120u * GB && disk.size < 1u * TB
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
match: disk.transport == "sata" && !disk.rotational && !system_disk
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## filesystem {#UserVolumeConfig.filesystem}
|
||||
|
||||
FilesystemSpec configures the filesystem for the volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`type` |FilesystemType |Filesystem type. Default is `xfs`. |`ext4`<br />`xfs`<br /> |
|
||||
|`projectQuotaSupport` |bool |Enables project quota support, valid only for 'xfs' filesystem.<br><br>Note: changing this value might require a full remount of the filesystem. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## encryption {#UserVolumeConfig.encryption}
|
||||
|
||||
EncryptionSpec represents volume encryption settings.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: exampleKey # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
- slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# KMS managed encryption key.
|
||||
kms:
|
||||
endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
blockSize: 4096 # Defines the encryption sector size.
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`provider` |EncryptionProviderType |Encryption provider to use for the encryption. |`luks2`<br /> |
|
||||
|`keys` |<a href="#UserVolumeConfig.encryption.keys.">[]EncryptionKey</a> |Defines the encryption keys generation and storage method. | |
|
||||
|`cipher` |string |Cipher to use for the encryption. Depends on the encryption provider. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
cipher: aes-xts-plain64
|
||||
{{< /highlight >}}</details> |`aes-xts-plain64`<br />`xchacha12,aes-adiantum-plain64`<br />`xchacha20,aes-adiantum-plain64`<br /> |
|
||||
|`keySize` |uint |Defines the encryption key length. | |
|
||||
|`blockSize` |uint64 |Defines the encryption sector size. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
blockSize: 4096
|
||||
{{< /highlight >}}</details> | |
|
||||
|`options` |[]string |Additional --perf parameters for the LUKS2 encryption. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
options:
|
||||
- no_read_workqueue
|
||||
- no_write_workqueue
|
||||
{{< /highlight >}}</details> |`no_read_workqueue`<br />`no_write_workqueue`<br />`same_cpu_crypt`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
### keys[] {#UserVolumeConfig.encryption.keys.}
|
||||
|
||||
EncryptionKey represents configuration for disk encryption key.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`slot` |int |Key slot number for LUKS2 encryption. | |
|
||||
|`static` |<a href="#UserVolumeConfig.encryption.keys..static">EncryptionKeyStatic</a> |Key which value is stored in the configuration file. | |
|
||||
|`nodeID` |<a href="#UserVolumeConfig.encryption.keys..nodeID">EncryptionKeyNodeID</a> |Deterministically generated key from the node UUID and PartitionLabel. | |
|
||||
|`kms` |<a href="#UserVolumeConfig.encryption.keys..kms">EncryptionKeyKMS</a> |KMS managed encryption key. | |
|
||||
|`tpm` |<a href="#UserVolumeConfig.encryption.keys..tpm">EncryptionKeyTPM</a> |Enable TPM based disk encryption. | |
|
||||
|`lockToState` |bool |Lock the disk encryption key to the random salt stored in the STATE partition. This is useful to prevent the volume from being unlocked if STATE partition is compromised or replaced. It is recommended to use this option with TPM disk encryption for non-STATE volumes. | |
|
||||
|
||||
|
||||
|
||||
|
||||
#### static {#UserVolumeConfig.encryption.keys..static}
|
||||
|
||||
EncryptionKeyStatic represents throw away key type.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`passphrase` |string |Defines the static passphrase value. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### nodeID {#UserVolumeConfig.encryption.keys..nodeID}
|
||||
|
||||
EncryptionKeyNodeID represents deterministically generated key from the node UUID and PartitionLabel.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### kms {#UserVolumeConfig.encryption.keys..kms}
|
||||
|
||||
EncryptionKeyKMS represents a key that is generated and then sealed/unsealed by the KMS server.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
keys:
|
||||
- kms:
|
||||
endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`endpoint` |string |KMS endpoint to Seal/Unseal the key. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### tpm {#UserVolumeConfig.encryption.keys..tpm}
|
||||
|
||||
EncryptionKeyTPM represents a key that is generated and then sealed/unsealed by the TPM.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`checkSecurebootStatusOnEnroll` |bool |Check that Secureboot is enabled in the EFI firmware.<br>If Secureboot is not enabled, the enrollment of the key will fail. As the TPM key is anyways bound to the value of PCR 7, changing Secureboot status or configuration after the initial enrollment will make the key unusable. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,254 @@
|
||||
---
|
||||
description: |
|
||||
VolumeConfig is a system volume configuration document.
|
||||
Note: at the moment, only `STATE`, `EPHEMERAL` and `IMAGE-CACHE` system volumes are supported.
|
||||
title: VolumeConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: VolumeConfig
|
||||
name: EPHEMERAL # Name of the volume.
|
||||
# The provisioning describes how the volume is provisioned.
|
||||
provisioning:
|
||||
# The disk selector expression.
|
||||
diskSelector:
|
||||
match: disk.transport == "nvme" # The Common Expression Language (CEL) expression to match the disk.
|
||||
maxSize: 50GiB # The maximum size of the volume, if not specified the volume can grow to the size of the
|
||||
|
||||
# # The minimum size of the volume.
|
||||
# minSize: 2.5GiB
|
||||
|
||||
# # The encryption describes how the volume is encrypted.
|
||||
# encryption:
|
||||
# provider: luks2 # Encryption provider to use for the encryption.
|
||||
# # Defines the encryption keys generation and storage method.
|
||||
# keys:
|
||||
# - slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# # Key which value is stored in the configuration file.
|
||||
# static:
|
||||
# passphrase: exampleKey # Defines the static passphrase value.
|
||||
#
|
||||
# # # KMS managed encryption key.
|
||||
# # kms:
|
||||
# # endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
# - slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
# cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
# blockSize: 4096 # Defines the encryption sector size.
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the volume. | |
|
||||
|`provisioning` |<a href="#VolumeConfig.provisioning">ProvisioningSpec</a> |The provisioning describes how the volume is provisioned. | |
|
||||
|`encryption` |<a href="#VolumeConfig.encryption">EncryptionSpec</a> |The encryption describes how the volume is encrypted. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## provisioning {#VolumeConfig.provisioning}
|
||||
|
||||
ProvisioningSpec describes how the volume is provisioned.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`diskSelector` |<a href="#VolumeConfig.provisioning.diskSelector">DiskSelector</a> |The disk selector expression. | |
|
||||
|`grow` |bool |Should the volume grow to the size of the disk (if possible). | |
|
||||
|`minSize` |ByteSize |The minimum size of the volume.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
minSize: 2.5GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|`maxSize` |ByteSize |The maximum size of the volume, if not specified the volume can grow to the size of the<br>disk.<br><br>Size is specified in bytes, but can be expressed in human readable format, e.g. 100MB. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
maxSize: 50GiB
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
### diskSelector {#VolumeConfig.provisioning.diskSelector}
|
||||
|
||||
DiskSelector selects a disk for the volume.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`match` |Expression |The Common Expression Language (CEL) expression to match the disk. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
match: disk.size > 120u * GB && disk.size < 1u * TB
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
match: disk.transport == "sata" && !disk.rotational && !system_disk
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## encryption {#VolumeConfig.encryption}
|
||||
|
||||
EncryptionSpec represents volume encryption settings.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
provider: luks2 # Encryption provider to use for the encryption.
|
||||
# Defines the encryption keys generation and storage method.
|
||||
keys:
|
||||
- slot: 0 # Key slot number for LUKS2 encryption.
|
||||
# Key which value is stored in the configuration file.
|
||||
static:
|
||||
passphrase: exampleKey # Defines the static passphrase value.
|
||||
|
||||
# # KMS managed encryption key.
|
||||
# kms:
|
||||
# endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
- slot: 1 # Key slot number for LUKS2 encryption.
|
||||
# KMS managed encryption key.
|
||||
kms:
|
||||
endpoint: https://example-kms-endpoint.com # KMS endpoint to Seal/Unseal the key.
|
||||
cipher: aes-xts-plain64 # Cipher to use for the encryption. Depends on the encryption provider.
|
||||
blockSize: 4096 # Defines the encryption sector size.
|
||||
|
||||
# # Additional --perf parameters for the LUKS2 encryption.
|
||||
# options:
|
||||
# - no_read_workqueue
|
||||
# - no_write_workqueue
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`provider` |EncryptionProviderType |Encryption provider to use for the encryption. |`luks2`<br /> |
|
||||
|`keys` |<a href="#VolumeConfig.encryption.keys.">[]EncryptionKey</a> |Defines the encryption keys generation and storage method. | |
|
||||
|`cipher` |string |Cipher to use for the encryption. Depends on the encryption provider. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
cipher: aes-xts-plain64
|
||||
{{< /highlight >}}</details> |`aes-xts-plain64`<br />`xchacha12,aes-adiantum-plain64`<br />`xchacha20,aes-adiantum-plain64`<br /> |
|
||||
|`keySize` |uint |Defines the encryption key length. | |
|
||||
|`blockSize` |uint64 |Defines the encryption sector size. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
blockSize: 4096
|
||||
{{< /highlight >}}</details> | |
|
||||
|`options` |[]string |Additional --perf parameters for the LUKS2 encryption. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
options:
|
||||
- no_read_workqueue
|
||||
- no_write_workqueue
|
||||
{{< /highlight >}}</details> |`no_read_workqueue`<br />`no_write_workqueue`<br />`same_cpu_crypt`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
### keys[] {#VolumeConfig.encryption.keys.}
|
||||
|
||||
EncryptionKey represents configuration for disk encryption key.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`slot` |int |Key slot number for LUKS2 encryption. | |
|
||||
|`static` |<a href="#VolumeConfig.encryption.keys..static">EncryptionKeyStatic</a> |Key which value is stored in the configuration file. | |
|
||||
|`nodeID` |<a href="#VolumeConfig.encryption.keys..nodeID">EncryptionKeyNodeID</a> |Deterministically generated key from the node UUID and PartitionLabel. | |
|
||||
|`kms` |<a href="#VolumeConfig.encryption.keys..kms">EncryptionKeyKMS</a> |KMS managed encryption key. | |
|
||||
|`tpm` |<a href="#VolumeConfig.encryption.keys..tpm">EncryptionKeyTPM</a> |Enable TPM based disk encryption. | |
|
||||
|`lockToState` |bool |Lock the disk encryption key to the random salt stored in the STATE partition. This is useful to prevent the volume from being unlocked if STATE partition is compromised or replaced. It is recommended to use this option with TPM disk encryption for non-STATE volumes. | |
|
||||
|
||||
|
||||
|
||||
|
||||
#### static {#VolumeConfig.encryption.keys..static}
|
||||
|
||||
EncryptionKeyStatic represents throw away key type.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`passphrase` |string |Defines the static passphrase value. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### nodeID {#VolumeConfig.encryption.keys..nodeID}
|
||||
|
||||
EncryptionKeyNodeID represents deterministically generated key from the node UUID and PartitionLabel.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### kms {#VolumeConfig.encryption.keys..kms}
|
||||
|
||||
EncryptionKeyKMS represents a key that is generated and then sealed/unsealed by the KMS server.
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
encryption:
|
||||
keys:
|
||||
- kms:
|
||||
endpoint: https://192.168.88.21:4443 # KMS endpoint to Seal/Unseal the key.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`endpoint` |string |KMS endpoint to Seal/Unseal the key. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### tpm {#VolumeConfig.encryption.keys..tpm}
|
||||
|
||||
EncryptionKeyTPM represents a key that is generated and then sealed/unsealed by the TPM.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`checkSecurebootStatusOnEnroll` |bool |Check that Secureboot is enabled in the EFI firmware.<br>If Secureboot is not enabled, the enrollment of the key will fail. As the TPM key is anyways bound to the value of PCR 7, changing Secureboot status or configuration after the initial enrollment will make the key unusable. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
---
|
||||
description: |
|
||||
ZswapConfig is a zswap (compressed memory) configuration document.
|
||||
When zswap is enabled, Linux kernel compresses pages that would otherwise be swapped out to disk.
|
||||
The compressed pages are stored in a memory pool, which is used to avoid writing to disk
|
||||
when the system is under memory pressure.
|
||||
title: ZswapConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: ZswapConfig
|
||||
maxPoolPercent: 25 # The maximum percent of memory that zswap can use.
|
||||
shrinkerEnabled: true # Enable the shrinker feature: kernel might move
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`maxPoolPercent` |int |The maximum percent of memory that zswap can use.<br>This is a percentage of the total system memory.<br>The value must be between 0 and 100. | |
|
||||
|`shrinkerEnabled` |bool |Enable the shrinker feature: kernel might move<br>cold pages from zswap to swap device to free up memory<br>for other use cases. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package extensions provides extensions config documents.
|
||||
title: extensions
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,57 @@
|
||||
---
|
||||
description: ExtensionServiceConfig is a extensionserviceconfig document.
|
||||
title: ExtensionServiceConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: ExtensionServiceConfig
|
||||
name: nut-client # Name of the extension service.
|
||||
# The config files for the extension service.
|
||||
configFiles:
|
||||
- content: MONITOR ${upsmonHost} 1 remote username password # The content of the extension service config file.
|
||||
mountPath: /usr/local/etc/nut/upsmon.conf # The mount path of the extension service config file.
|
||||
# The environment for the extension service.
|
||||
environment:
|
||||
- NUT_UPS=upsname
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the extension service. | |
|
||||
|`configFiles` |<a href="#ExtensionServiceConfig.configFiles.">[]ConfigFile</a> |The config files for the extension service. | |
|
||||
|`environment` |[]string |The environment for the extension service. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## configFiles[] {#ExtensionServiceConfig.configFiles.}
|
||||
|
||||
ConfigFile is a config file for extension services.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`content` |string |The content of the extension service config file. | |
|
||||
|`mountPath` |string |The mount path of the extension service config file. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package hardware provides hardware related config documents.
|
||||
title: hardware
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
---
|
||||
description: PCIDriverRebindConfig allows to configure PCI driver rebinds.
|
||||
title: PCIDriverRebindConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: PCIDriverRebindConfig
|
||||
name: 0000:04:00.00 # PCI device id
|
||||
targetDriver: vfio-pci # Target driver to rebind the PCI device to.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |PCI device id | |
|
||||
|`targetDriver` |string |Target driver to rebind the PCI device to. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package network provides network machine configuration documents.
|
||||
title: network
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,87 @@
|
||||
---
|
||||
description: EthernetConfig is a config document to configure Ethernet interfaces.
|
||||
title: EthernetConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: EthernetConfig
|
||||
name: enp0s2 # Name of the link (interface).
|
||||
# Configuration for Ethernet features.
|
||||
features:
|
||||
tx-tcp-segmentation: false
|
||||
# Configuration for Ethernet link rings.
|
||||
rings:
|
||||
rx: 256 # Number of RX rings.
|
||||
# Configuration for Ethernet link channels.
|
||||
channels:
|
||||
rx: 4 # Number of RX channels.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the link (interface). | |
|
||||
|`features` |map[string]bool |Configuration for Ethernet features.<br><br>Set of features available and whether they can be enabled or disabled is driver specific.<br>Use `talosctl get ethernetstatus <link> -o yaml` to get the list of available features and<br>their current status. | |
|
||||
|`rings` |<a href="#EthernetConfig.rings">EthernetRingsConfig</a> |Configuration for Ethernet link rings.<br><br>This is similar to `ethtool -G` command. | |
|
||||
|`channels` |<a href="#EthernetConfig.channels">EthernetChannelsConfig</a> |Configuration for Ethernet link channels.<br><br>This is similar to `ethtool -L` command. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## rings {#EthernetConfig.rings}
|
||||
|
||||
EthernetRingsConfig is a configuration for Ethernet link rings.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`rx` |uint32 |Number of RX rings. | |
|
||||
|`tx` |uint32 |Number of TX rings. | |
|
||||
|`rx-mini` |uint32 |Number of RX mini rings. | |
|
||||
|`rx-jumbo` |uint32 |Number of RX jumbo rings. | |
|
||||
|`rx-buf-len` |uint32 |RX buffer length. | |
|
||||
|`cqe-size` |uint32 |CQE size. | |
|
||||
|`tx-push` |bool |TX push enabled. | |
|
||||
|`rx-push` |bool |RX push enabled. | |
|
||||
|`tx-push-buf-len` |uint32 |TX push buffer length. | |
|
||||
|`tcp-data-split` |bool |TCP data split enabled. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## channels {#EthernetConfig.channels}
|
||||
|
||||
EthernetChannelsConfig is a configuration for Ethernet link channels.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`rx` |uint32 |Number of RX channels. | |
|
||||
|`tx` |uint32 |Number of TX channels. | |
|
||||
|`other` |uint32 |Number of other channels. | |
|
||||
|`combined` |uint32 |Number of combined channels. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
---
|
||||
description: KubeSpanEndpointsConfig is a config document to configure KubeSpan endpoints.
|
||||
title: KubeSpanEndpointsConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: KubeSpanEndpointsConfig
|
||||
# A list of extra Wireguard endpoints to announce from this machine.
|
||||
extraAnnouncedEndpoints:
|
||||
- 192.168.13.46:52000
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`extraAnnouncedEndpoints` |[]AddrPort |A list of extra Wireguard endpoints to announce from this machine.<br><br>Talos automatically adds endpoints based on machine addresses, public IP, etc.<br>This field allows to add extra endpoints which are managed outside of Talos, e.g. NAT mapping. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
---
|
||||
description: NetworkDefaultActionConfig is a ingress firewall default action configuration document.
|
||||
title: NetworkDefaultActionConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: NetworkDefaultActionConfig
|
||||
ingress: accept # Default action for all not explicitly configured ingress traffic: accept or block.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`ingress` |DefaultAction |Default action for all not explicitly configured ingress traffic: accept or block. |`accept`<br />`block`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,90 @@
|
||||
---
|
||||
description: NetworkRuleConfig is a network firewall rule config document.
|
||||
title: NetworkRuleConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: NetworkRuleConfig
|
||||
name: ingress-apid # Name of the config document.
|
||||
# Port selector defines which ports and protocols on the host are affected by the rule.
|
||||
portSelector:
|
||||
# Ports defines a list of port ranges or single ports.
|
||||
ports:
|
||||
- 50000
|
||||
protocol: tcp # Protocol defines traffic protocol (e.g. TCP or UDP).
|
||||
# Ingress defines which source subnets are allowed to access the host ports/protocols defined by the `portSelector`.
|
||||
ingress:
|
||||
- subnet: 192.168.0.0/16 # Subnet defines a source subnet.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the config document. | |
|
||||
|`portSelector` |<a href="#NetworkRuleConfig.portSelector">RulePortSelector</a> |Port selector defines which ports and protocols on the host are affected by the rule. | |
|
||||
|`ingress` |<a href="#NetworkRuleConfig.ingress.">[]IngressRule</a> |Ingress defines which source subnets are allowed to access the host ports/protocols defined by the `portSelector`. | |
|
||||
|
||||
|
||||
|
||||
|
||||
## portSelector {#NetworkRuleConfig.portSelector}
|
||||
|
||||
RulePortSelector is a port selector for the network rule.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`ports` |PortRanges |Ports defines a list of port ranges or single ports.<br>The port ranges are inclusive, and should not overlap. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
ports:
|
||||
- 80
|
||||
- 443
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
ports:
|
||||
- 1200-1299
|
||||
- 8080
|
||||
{{< /highlight >}}</details> | |
|
||||
|`protocol` |Protocol |Protocol defines traffic protocol (e.g. TCP or UDP). |`tcp`<br />`udp`<br />`icmp`<br />`icmpv6`<br /> |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## ingress[] {#NetworkRuleConfig.ingress.}
|
||||
|
||||
IngressRule is a ingress rule.
|
||||
|
||||
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`subnet` |Prefix |Subnet defines a source subnet. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
subnet: 10.3.4.0/24
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
subnet: 2001:db8::/32
|
||||
{{< /highlight >}}{{< highlight yaml >}}
|
||||
subnet: 1.3.4.5/32
|
||||
{{< /highlight >}}</details> | |
|
||||
|`except` |Prefix |Except defines a source subnet to exclude from the rule, it gets excluded from the `subnet`. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package runtime provides runtime machine configuration documents.
|
||||
title: runtime
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
---
|
||||
description: EventSinkConfig is a event sink config document.
|
||||
title: EventSinkConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: EventSinkConfig
|
||||
endpoint: 192.168.10.3:3247 # The endpoint for the event sink as 'host:port'.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`endpoint` |string |The endpoint for the event sink as 'host:port'. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
endpoint: 10.3.7.3:2810
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
---
|
||||
description: KmsgLogConfig is a event sink config document.
|
||||
title: KmsgLogConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: KmsgLogConfig
|
||||
name: remote-log # Name of the config document.
|
||||
url: tcp://192.168.3.7:3478/ # The URL encodes the log destination.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the config document. | |
|
||||
|`url` |URL |The URL encodes the log destination.<br>The scheme must be tcp:// or udp://.<br>The path must be empty.<br>The port is required. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
url: udp://10.3.7.3:2810
|
||||
{{< /highlight >}}</details> | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
---
|
||||
description: WatchdogTimerConfig is a watchdog timer config document.
|
||||
title: WatchdogTimerConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: WatchdogTimerConfig
|
||||
device: /dev/watchdog0 # Path to the watchdog device.
|
||||
timeout: 2m0s # Timeout for the watchdog.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`device` |string |Path to the watchdog device. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
device: /dev/watchdog0
|
||||
{{< /highlight >}}</details> | |
|
||||
|`timeout` |Duration |Timeout for the watchdog.<br><br>If Talos is unresponsive for this duration, the watchdog will reset the system.<br><br>Default value is 1 minute, minimum value is 10 seconds. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package security provides security-related machine configuration documents.
|
||||
title: security
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
---
|
||||
description: TrustedRootsConfig allows to configure additional trusted CA roots.
|
||||
title: TrustedRootsConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: TrustedRootsConfig
|
||||
name: my-enterprise-ca # Name of the config document.
|
||||
certificates: | # List of additional trusted certificate authorities (as PEM-encoded certificates).
|
||||
-----BEGIN CERTIFICATE-----
|
||||
...
|
||||
-----END CERTIFICATE-----
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`name` |string |Name of the config document. | |
|
||||
|`certificates` |string |List of additional trusted certificate authorities (as PEM-encoded certificates).<br><br>Multiple certificates can be provided in a single config document, separated by newline characters. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
---
|
||||
description: |
|
||||
Package siderolink provides SideroLink machine configuration documents.
|
||||
title: siderolink
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
---
|
||||
description: SideroLinkConfig is a SideroLink connection machine configuration document.
|
||||
title: SideroLinkConfig
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{{< highlight yaml >}}
|
||||
apiVersion: v1alpha1
|
||||
kind: SideroLinkConfig
|
||||
apiUrl: https://siderolink.api/jointoken?token=secret # SideroLink API URL to connect to.
|
||||
{{< /highlight >}}
|
||||
|
||||
|
||||
| Field | Type | Description | Value(s) |
|
||||
|-------|------|-------------|----------|
|
||||
|`apiUrl` |URL |SideroLink API URL to connect to. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
|
||||
apiUrl: https://siderolink.api/?jointoken=secret
|
||||
{{< /highlight >}}</details> | |
|
||||
|`uniqueToken` |string |SideroLink unique token to use for the connection (optional).<br><br>This value is overridden with META key UniqueMachineToken. | |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
---
|
||||
description: |
|
||||
Package v1alpha1 contains definition of the `v1alpha1` configuration document.
|
||||
|
||||
Even though the machine configuration in Talos Linux is multi-document, at the moment
|
||||
this configuration document contains most of the configuration options.
|
||||
|
||||
It is expected that new configuration options will be added as new documents, and existing ones
|
||||
migrated to their own documents.
|
||||
title: v1alpha1
|
||||
---
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
3609
website/content/v1.12/reference/configuration/v1alpha1/config.md
Normal file
3609
website/content/v1.12/reference/configuration/v1alpha1/config.md
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user