diff --git a/Dockerfile b/Dockerfile index e46a788ef..d2804718e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -557,5 +557,5 @@ RUN mkdir -p /docs/talosctl \ && env HOME=/home/user TAG=latest /bin/talosctl docs /docs/talosctl FROM scratch AS docs -COPY --from=docs-build /tmp/v1alpha1.md /docs/website/content/v0.6/en/configuration/v1alpha1.md +COPY --from=docs-build /tmp/v1alpha1.md /docs/website/content/v0.7/en/configuration/v1alpha1.md COPY --from=docs-build /docs/talosctl/* /docs/talosctl/ diff --git a/docs/website/components/DocumentationDropdown.vue b/docs/website/components/DocumentationDropdown.vue index 72e945214..1fa5d5f77 100644 --- a/docs/website/components/DocumentationDropdown.vue +++ b/docs/website/components/DocumentationDropdown.vue @@ -35,6 +35,7 @@ export default { data() { return { options: [ + { version: 'v0.7', url: '/docs/v0.7', prerelease: true }, { version: 'v0.6', url: '/docs/v0.6', prerelease: false }, { version: 'v0.5', url: '/docs/v0.5', prerelease: false }, { version: 'v0.4', url: '/docs/v0.4', prerelease: false } diff --git a/docs/website/content/v0.7.en.json b/docs/website/content/v0.7.en.json new file mode 100644 index 000000000..2f1b38fff --- /dev/null +++ b/docs/website/content/v0.7.en.json @@ -0,0 +1,191 @@ +[ + { + "title": "Guides", + "path": "v0.7/en/guides/getting-started/intro", + "items": [ + { + "title": "Getting Started", + "path": "v0.7/en/guides/getting-started", + "children": [ + { + "title": "Introduction to Talos", + "path": "v0.7/en/guides/getting-started/intro" + }, + { + "title": "Talosctl", + "path": "v0.7/en/guides/getting-started/talosctl" + }, + { + "title": "How to Get Help", + "path": "v0.7/en/guides/getting-started/help" + } + ] + }, + { + "title": "Local Clusters", + "children": [ + { + "title": "Docker", + "path": "v0.7/en/guides/local/docker" + }, + { + "title": "QEMU", + "path": "v0.7/en/guides/local/qemu" + }, + { + "title": "Firecracker", + "path": "v0.7/en/guides/local/firecracker" + }, + { + "title": "Registry Cache", + "path": "v0.7/en/guides/local/registry-cache" + } + ] + }, + { + "title": "Cloud", + "path": "v0.7/en/guides/cloud", + "children": [ + { + "title": "AWS", + "path": "v0.7/en/guides/cloud/aws" + }, + { + "title": "Azure", + "path": "v0.7/en/guides/cloud/azure" + }, + { + "title": "Digital Ocean", + "path": "v0.7/en/guides/cloud/digitalocean" + }, + { + "title": "GCP", + "path": "v0.7/en/guides/cloud/gcp" + }, + { + "title": "VMware", + "path": "v0.7/en/guides/cloud/vmware" + } + ] + }, + { + "title": "Metal", + "path": "v0.7/en/guides/metal", + "children": [ + { + "title": "Overview", + "path": "v0.7/en/guides/metal/overview" + }, + { + "title": "Arges", + "path": "v0.7/en/guides/metal/arges" + }, + { + "title": "Digital Rebar", + "path": "v0.7/en/guides/metal/digitalrebar" + }, + { + "title": "Matchbox", + "path": "v0.7/en/guides/metal/matchbox" + } + ] + }, + { + "title": "Upgrading", + "path": "v0.7/en/guides/upgrading" + } + ] + }, + { + "title": "Configuration", + "path": "v0.7/en/configuration", + "items": [ + { + "title": "Overview", + "path": "v0.7/en/configuration/overview" + }, + { + "title": "v1alpha1", + "path": "v0.7/en/configuration/v1alpha1" + } + ] + }, + { + "title": "Troubleshooting", + "path": "v0.7/en/troubleshooting", + "items": [ + { + "title": "Overview", + "path": "v0.7/en/troubleshooting/overview" + }, + { + "title": "PKI", + "path": "v0.7/en/troubleshooting/pki" + }, + { + "title": "Machine Reset", + "path": "v0.7/en/troubleshooting/machine-reset" + } + ] + }, + { + "title": "Customization", + "path": "v0.7/en/customization", + "items": [ + { + "title": "Overview", + "path": "v0.7/en/customization/overview" + }, + { + "title": "Containerd", + "path": "v0.7/en/customization/containerd" + }, + { + "title": "Corporate Proxy", + "path": "v0.7/en/customization/proxy" + }, + { + "title": "Kernel", + "path": "v0.7/en/customization/kernel" + } + ] + }, + { + "title": "Components", + "path": "v0.7/en/components", + "items": [ + { + "title": "Overview", + "path": "v0.7/en/components/overview" + }, + { + "title": "apid", + "path": "v0.7/en/components/apid" + }, + { + "title": "kernel", + "path": "v0.7/en/components/kernel" + }, + { + "title": "machined", + "path": "v0.7/en/components/machined" + }, + { + "title": "networkd", + "path": "v0.7/en/components/networkd" + }, + { + "title": "timed", + "path": "v0.7/en/components/timed" + }, + { + "title": "trustd", + "path": "v0.7/en/components/trustd" + }, + { + "title": "udevd", + "path": "v0.7/en/components/udevd" + } + ] + } +] diff --git a/docs/website/content/v0.7/en/components/apid.md b/docs/website/content/v0.7/en/components/apid.md new file mode 100644 index 000000000..e9b990e7e --- /dev/null +++ b/docs/website/content/v0.7/en/components/apid.md @@ -0,0 +1,50 @@ +--- +title: 'apid' +--- + +When interacting with Talos, the gRPC api endpoint you will interact with directly is `apid`. +Apid acts as the gateway for all component interactions. +Apid provides a mechanism to route requests to the appropriate destination when running on a control plane node. + +We'll use some examples below to illustrate what `apid` is doing. + +When a user wants to interact with a Talos component via `talosctl`, there are two flags that control the interaction with `apid`. +The `-e | --endpoints` flag is used to denote which Talos node ( via `apid` ) should handle the connection. +Typically this is a public facing server. +The `-n | --nodes` flag is used to denote which Talos node(s) should respond to the request. +If `--nodes` is not specified, the first endpoint will be used. + +> Note: Typically there will be an `endpoint` already defined in the Talos config file. +> Optionally, `nodes` can be included here as well. + +For example, if a user wants to interact with `machined`, a command like `talosctl -e cluster.talos.dev memory` may be used. + +```bash +$ talosctl -e cluster.talos.dev memory +NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE +cluster.talos.dev 7938 1768 2390 145 53 3724 6571 +``` + +In this case, `talosctl` is interacting with `apid` running on `cluster.talos.dev` and forwarding the request to the `machined` api. + +If we wanted to extend our example to retrieve `memory` from another node in our cluster, we could use the command `talosctl -e cluster.talos.dev -n node02 memory`. + +```bash +$ talosctl -e cluster.talos.dev -n node02 memory +NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE +node02 7938 1768 2390 145 53 3724 6571 +``` + +The `apid` instance on `cluster.talos.dev` receives the request and forwards it to `apid` running on `node02` which forwards the request to the `machined` api. + +We can further extend our example to retrieve `memory` for all nodes in our cluster by appending additional `-n node` flags or using a comma separated list of nodes ( `-n node01,node02,node03` ): + +```bash +$ talosctl -e cluster.talos.dev -n node01 -n node02 -n node03 memory +NODE TOTAL USED FREE SHARED BUFFERS CACHE AVAILABLE +node01 7938 871 4071 137 49 2945 7042 +node02 257844 14408 190796 18138 49 52589 227492 +node03 257844 1830 255186 125 49 777 254556 +``` + +The `apid` instance on `cluster.talos.dev` receives the request and forwards is to `node01`, `node02`, and `node03` which then forwards the request to their local `machined` api. diff --git a/docs/website/content/v0.7/en/components/containerd.md b/docs/website/content/v0.7/en/components/containerd.md new file mode 100644 index 000000000..90ccdfd58 --- /dev/null +++ b/docs/website/content/v0.7/en/components/containerd.md @@ -0,0 +1,7 @@ +--- +title: containerd +--- + +[Containerd](https://github.com/containerd/containerd) provides the container runtime to launch workloads on Talos as well as Kubernetes. + +Talos services are namespaced under the `system` namespace in containerd whereas the Kubernetes services are namespaced under the `k8s.io` namespace. diff --git a/docs/website/content/v0.7/en/components/kernel.md b/docs/website/content/v0.7/en/components/kernel.md new file mode 100644 index 000000000..713e0314e --- /dev/null +++ b/docs/website/content/v0.7/en/components/kernel.md @@ -0,0 +1,5 @@ +--- +title: 'kernel' +--- + +The Linux kernel included with Talos is configured according to the recommendations outlined in the Kernel Self Protection Project ([KSSP](http://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project)). diff --git a/docs/website/content/v0.7/en/components/machined.md b/docs/website/content/v0.7/en/components/machined.md new file mode 100644 index 000000000..8b1b08ede --- /dev/null +++ b/docs/website/content/v0.7/en/components/machined.md @@ -0,0 +1,20 @@ +--- +title: 'machined' +--- + +A common theme throughout the design of Talos is minimalism. +We believe strongly in the UNIX philosophy that each program should do one job well. +The `init` included in Talos is one example of this, and we are calling it "`machined`". + +We wanted to create a focused `init` that had one job - run Kubernetes. +To that extent, `machined` is relatively static in that it does not allow for arbitrary user defined services. +Only the services necessary to run Kubernetes and manage the node are available. +This includes: + +- [containerd](containerd) +- [kubeadm](kubeadm) +- [kubelet](https://kubernetes.io/docs/concepts/overview/components/) +- [networkd](networkd) +- [timed](timed) +- [trustd](trustd) +- [udevd](udevd) diff --git a/docs/website/content/v0.7/en/components/networkd.md b/docs/website/content/v0.7/en/components/networkd.md new file mode 100644 index 000000000..097d5c5a2 --- /dev/null +++ b/docs/website/content/v0.7/en/components/networkd.md @@ -0,0 +1,100 @@ +--- +title: networkd +--- + +Networkd handles all of the host level network configuration. +Configuration is defined under the `networking` key. + +By default, we attempt to issue a DHCP request for every interface on the server. +This can be overridden by supplying one of the following kernel arguments: + +- `talos.network.interface.ignore` - specify a list of interfaces to skip discovery on +- `ip` - `ip=:::::::::` as documented in the [kernel here](https://www.kernel.org/doc/Documentation/filesystems/nfs/nfsroot.txt) + - ex, `ip=10.0.0.99:::255.0.0.0:control-1:eth0:off:10.0.0.1` + +## Examples + +Documentation for the network section components can be found under the configuration reference. + +### Static Addressing + +Static addressing is comprised of specifying `cidr`, `routes` ( remember to add your default gateway ), and `interface`. +Most likely you'll also want to define the `nameservers` so you have properly functioning DNS. + +```yaml +machine: + network: + hostname: talos + nameservers: + - 10.0.0.1 + time: + servers: + - time.cloudflare.com + interfaces: + - interface: eth0 + cidr: 10.0.0.201/8 + mtu: 8765 + routes: + - network: 0.0.0.0/0 + gateway: 10.0.0.1 + - interface: eth1 + ignore: true +``` + +### Additional Addresses for an Interface + +In some environments you may need to set additional addresses on an interface. +In the following example, we set two additional addresses on the loopback interface. + +```yaml +machine: + network: + interfaces: + - interface: lo0 + cidr: 192.168.0.21/24 + - interface: lo0 + cidr: 10.2.2.2/24 + + +``` + +### Bonding + +The following example shows how to create a bonded interface. + +```yaml +machine: + network: + interfaces: + - interface: bond0 + dhcp: true + bond: + mode: 802.3ad + lacprate: fast + hashpolicy: layer3+4 + miimon: 100 + updelay: 200 + downdelay: 200 + interfaces: + - eth0 + - eth1 +``` + +### VLANs + +To setup vlans on a specific device use an array of VLANs to add. +The master device may be configured without addressing by setting dhcp to false. + +```yaml +machine: + network: + interfaces: + - interface: eth0 + dhcp: false + vlans: + - vlanId: 100 + cidr: "192.168.2.10/28" + routes: + - network: 0.0.0.0/0 + gateway: 192.168.2.1 +``` diff --git a/docs/website/content/v0.7/en/components/osctl.md b/docs/website/content/v0.7/en/components/osctl.md new file mode 100644 index 000000000..843f9947b --- /dev/null +++ b/docs/website/content/v0.7/en/components/osctl.md @@ -0,0 +1,15 @@ +--- +title: 'talosctl' +--- + +`talosctl` CLI is the client to the [apid](/components/apid) service running on every node. +`talosctl` should provide enough functionality to be a replacement for typical interactive shell operations. +With it you can do things like: + +- `talosctl logs ` - retrieve container logs +- `talosctl restart ` - restart a service +- `talosctl reboot` - reset a node +- `talosctl dmesg` - retrieve kernel logs +- `talosctl ps` - view running services +- `talosctl top` - view node resources +- `talosctl services` - view status of Talos services diff --git a/docs/website/content/v0.7/en/components/overview.md b/docs/website/content/v0.7/en/components/overview.md new file mode 100644 index 000000000..7b4c7c793 --- /dev/null +++ b/docs/website/content/v0.7/en/components/overview.md @@ -0,0 +1,19 @@ +--- +title: 'Components' +--- + +In this section we will discuss the various components of which Talos is comprised. + +## Overview + +| Component | Description | +| ------------ | ----------- | +| [apid](apid) | When interacting with Talos, the gRPC API endpoint you're interact with directly is provided by `apid`. `apid` acts as the gateway for all component interactions and forwards the requests to `routerd`. | +| [containerd](containerd) | An industry-standard container runtime with an emphasis on simplicity, robustness and portability. To learn more see the [containerd website](https://containerd.io). | +| [machined](machined) | Talos replacement for the traditional Linux init-process. Specially designed to run Kubernetes and does not allow starting arbitrary user services. | +| [networkd](networkd) | Handles all of the host level network configuration. Configuration is defined under the `networking` key | +| [timed](timed) | Handles the host time synchronization by acting as a NTP-client. | +| [kernel](kernel) | The Linux kernel included with Talos is configured according to the recommendations outlined in the [Kernel Self Protection Project](http://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project). | +| [routerd](routerd) | Responsible for routing an incoming API request from `apid` to the appropriate backend (e.g. `osd`, `machined` and `timed`). | +| [trustd](trustd) | To run and operate a Kubernetes cluster a certain level of trust is required. Based on the concept of a 'Root of Trust', `trustd` is a simple daemon responsible for establishing trust within the system. | +| [udevd](udevd) | Implementation of `eudev` into `machined`. `eudev` is Gentoo's fork of udev, systemd's device file manager for the Linux kernel. It manages device nodes in /dev and handles all user space actions when adding or removing devices. To learn more see the [Gentoo Wiki](https://wiki.gentoo.org/wiki/Eudev). | diff --git a/docs/website/content/v0.7/en/components/timed.md b/docs/website/content/v0.7/en/components/timed.md new file mode 100644 index 000000000..511a611b8 --- /dev/null +++ b/docs/website/content/v0.7/en/components/timed.md @@ -0,0 +1,5 @@ +--- +title: timed +--- + +Timed handles the host time synchronization. diff --git a/docs/website/content/v0.7/en/components/trustd.md b/docs/website/content/v0.7/en/components/trustd.md new file mode 100644 index 000000000..f3aade616 --- /dev/null +++ b/docs/website/content/v0.7/en/components/trustd.md @@ -0,0 +1,14 @@ +--- +title: 'trustd' +--- + +Security is one of the highest priorities within Talos. +To run a Kubernetes cluster a certain level of trust is required to operate a cluster. +For example, orchestrating the bootstrap of a highly available control plane requires the distribution of sensitive PKI data. + +To that end, we created `trustd`. +Based on the concept of a Root of Trust, `trustd` is a simple daemon responsible for establishing trust within the system. +Once trust is established, various methods become available to the trustee. +It can, for example, accept a write request from another node to place a file on disk. + +Additional methods and capability will be added to the `trustd` component in support of new functionality in the rest of the Talos environment. diff --git a/docs/website/content/v0.7/en/components/udevd.md b/docs/website/content/v0.7/en/components/udevd.md new file mode 100644 index 000000000..4d90819e4 --- /dev/null +++ b/docs/website/content/v0.7/en/components/udevd.md @@ -0,0 +1,5 @@ +--- +title: 'udevd' +--- + +Udevd handles the kernel device notifications and sets up the necessary links in `/dev`. diff --git a/docs/website/content/v0.7/en/configuration/overview.md b/docs/website/content/v0.7/en/configuration/overview.md new file mode 100644 index 000000000..45498e9b1 --- /dev/null +++ b/docs/website/content/v0.7/en/configuration/overview.md @@ -0,0 +1,44 @@ +--- +title: 'Configuration Overview' +--- + +In this section, we will step through the configuration of a Talos based Kubernetes cluster. +There are three major components we will configure: + +- `apid` and `talosctl` +- the master nodes +- the worker nodes + +Talos enforces a high level of security by using mutual TLS for authentication and authorization. + +We recommend that the configuration of Talos be performed by a cluster owner. +A cluster owner should be a person of authority within an organization, perhaps a director, manager, or senior member of a team. +They are responsible for storing the root CA, and distributing the PKI for authorized cluster administrators. + +### Recommended settings + +Talos runs great out of the box, but if you tweak some minor settings it will make your life +a lot easier in the future. +This is not a requirement, but rather a document to explain some key settings. + +#### Endpoint + +To configure the `talosctl` endpoint, it is recommended you use a resolvable DNS name. +This way, if you decide to upgrade to a multi-controlplane cluster you only have to add the ip adres to the hostname configuration. +The configuration can either be done on a Loadbalancer, or simply trough DNS. + +For example: + +> This is in the config file for the cluster e.g. init.yaml, controlplane.yaml and join.yaml. +> for more details, please see: [v1alpha1 endpoint configuration](https://www.talos.dev/docs/v0.7/en/configuration/v1alpha1#controlplane) + +``` yaml +..... +cluster: + controlPlane: + endpoint: https://endpoint.example.local:6443 +..... +``` + +If you have a DNS name as the endpoint, you can upgrade your talos cluster with multiple controlplanes in the future (if you don't have a multi-controlplane setup from the start) +Using a DNS name generates the corresponding Certificates (Kubernetes and Talos) for the correct hostname. diff --git a/docs/website/content/v0.7/en/configuration/v1alpha1.md b/docs/website/content/v0.7/en/configuration/v1alpha1.md new file mode 100644 index 000000000..42d51fb22 --- /dev/null +++ b/docs/website/content/v0.7/en/configuration/v1alpha1.md @@ -0,0 +1,1619 @@ +--- +title: v1alpha1 +--- + + + +Package v1alpha1 configuration file contains all the options available for configuring a machine. + +We can generate the files using `talosctl`. +This configuration is enough to get started in most cases, however it can be customized as needed. + +```bash +talosctl config generate --version v1alpha1 +```` + +This will generate a machine config for each node type, and a talosconfig. +The following is an example of an `init.yaml`: + +```yaml +version: v1alpha1 +machine: + type: init + token: 5dt69c.npg6duv71zwqhzbg + ca: + crt: + key: + certSANs: [] + kubelet: {} + network: {} + install: + disk: /dev/sda + image: docker.io/autonomy/installer:latest + bootloader: true + wipe: false + force: false +cluster: + controlPlane: + endpoint: https://1.2.3.4 + clusterName: example + network: + cni: "" + dnsDomain: cluster.local + podSubnets: + - 10.244.0.0/16 + serviceSubnets: + - 10.96.0.0/12 + token: wlzjyw.bei2zfylhs2by0wd + certificateKey: 20d9aafb46d6db4c0958db5b3fc481c8c14fc9b1abd8ac43194f4246b77131be + aescbcEncryptionSecret: z01mye6j16bspJYtTB/5SFX8j7Ph4JXxM2Xuu4vsBPM= + ca: + crt: + key: + apiServer: {} + controllerManager: {} + scheduler: {} + etcd: + ca: + crt: + key: +``` + +### Config + +#### version + +Indicates the schema used to decode the contents. + +Type: `string` + +Valid Values: + +- ``v1alpha1`` + +#### debug + +Enable verbose logging. + +Type: `bool` + +Valid Values: + +- `true` +- `yes` +- `false` +- `no` + +#### persist + +Indicates whether to pull the machine config upon every boot. + +Type: `bool` + +Valid Values: + +- `true` +- `yes` +- `false` +- `no` + +#### machine + +Provides machine specific configuration options. + +Type: `MachineConfig` + +#### cluster + +Provides cluster specific configuration options. + +Type: `ClusterConfig` + +--- + +### MachineConfig + +#### type + +Defines the role of the machine within the cluster. + +##### Init + +Init node type designates the first control plane node to come up. +You can think of it like a bootstrap node. +This node will perform the initial steps to bootstrap the cluster -- generation of TLS assets, starting of the control plane, etc. + +##### Control Plane + +Control Plane node type designates the node as a control plane member. +This means it will host etcd along with the Kubernetes master components such as API Server, Controller Manager, Scheduler. + +##### Worker + +Worker node type designates the node as a worker node. +This means it will be an available compute node for scheduling workloads. + +Type: `string` + +Valid Values: + +- ``init`` +- ``controlplane`` +- ``join`` + +#### token + +The `token` is used by a machine to join the PKI of the cluster. +Using this token, a machine will create a certificate signing request (CSR), and request a certificate that will be used as its' identity. + +Type: `string` + +Examples: + +```yaml +token: 328hom.uqjzh6jnn2eie9oi +``` + +> Warning: It is important to ensure that this token is correct since a machine's certificate has a short TTL by default + +#### ca + +The root certificate authority of the PKI. +It is composed of a base64 encoded `crt` and `key`. + +Type: `PEMEncodedCertificateAndKey` + +Examples: + +```yaml +ca: + crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJIekNCMHF... + key: LS0tLS1CRUdJTiBFRDI1NTE5IFBSSVZBVEUgS0VZLS0tLS0KTUM... + +``` + +#### certSANs + +Extra certificate subject alternative names for the machine's certificate. +By default, all non-loopback interface IPs are automatically added to the certificate's SANs. + +Type: `array` + +Examples: + +```yaml +certSANs: + - 10.0.0.10 + - 172.16.0.10 + - 192.168.0.10 + +``` + +#### kubelet + +Used to provide additional options to the kubelet. + +Type: `KubeletConfig` + +Examples: + +```yaml +kubelet: + image: + extraArgs: + key: value + +``` + +#### network + +Used to configure the machine's network. + +Type: `NetworkConfig` + +Examples: + +```yaml +network: + hostname: worker-1 + interfaces: + nameservers: + - 9.8.7.6 + - 8.7.6.5 + +``` + +#### disks + +Used to partition, format and mount additional disks. +Since the rootfs is read only with the exception of `/var`, mounts are only valid if they are under `/var`. +Note that the partitioning and formating is done only once, if and only if no existing partitions are found. +If `size:` is omitted, the partition is sized to occupy full disk. + +Type: `array` + +Examples: + +```yaml +disks: + - device: /dev/sdb + partitions: + - mountpoint: /var/lib/extra + size: 10000000000 + +``` + +> Note: `size` is in units of bytes. + +#### install + +Used to provide instructions for bare-metal installations. + +Type: `InstallConfig` + +Examples: + +```yaml +install: + disk: /dev/sda + extraKernelArgs: + - option=value + image: docker.io/autonomy/installer:latest + bootloader: true + wipe: false + force: false + +``` + +#### files + +Allows the addition of user specified files. +The value of `op` can be `create`, `overwrite`, or `append`. +In the case of `create`, `path` must not exist. +In the case of `overwrite`, and `append`, `path` must be a valid file. +If an `op` value of `append` is used, the existing file will be appended. +Note that the file contents are not required to be base64 encoded. + +Type: `array` + +Examples: + +```yaml +files: + - content: | + ... + permissions: 0666 + path: /tmp/file.txt + op: append + +``` + +> Note: The specified `path` is relative to `/var`. + +#### env + +The `env` field allows for the addition of environment variables to a machine. +All environment variables are set on the machine in addition to every service. + +Type: `Env` + +Valid Values: + +- ``GRPC_GO_LOG_VERBOSITY_LEVEL`` +- ``GRPC_GO_LOG_SEVERITY_LEVEL`` +- ``http_proxy`` +- ``https_proxy`` +- ``no_proxy`` + +Examples: + +```yaml +env: + GRPC_GO_LOG_VERBOSITY_LEVEL: "99" + GRPC_GO_LOG_SEVERITY_LEVEL: info + https_proxy: http://SERVER:PORT/ + +``` + +```yaml +env: + GRPC_GO_LOG_SEVERITY_LEVEL: error + https_proxy: https://USERNAME:PASSWORD@SERVER:PORT/ + +``` + +```yaml +env: + https_proxy: http://DOMAIN\\USERNAME:PASSWORD@SERVER:PORT/ + +``` + +#### time + +Used to configure the machine's time settings. + +Type: `TimeConfig` + +Examples: + +```yaml +time: + servers: + - time.cloudflare.com + +``` + +#### sysctls + +Used to configure the machine's sysctls. + +Type: `map` + +Examples: + +```yaml +sysctls: + kernel.domainname: talos.dev + net.ipv4.ip_forward: "0" + +``` + +#### registries + +Used to configure the machine's container image registry mirrors. + +Automatically generates matching CRI configuration for registry mirrors. + +Section `mirrors` allows to redirect requests for images to non-default registry, +which might be local registry or caching mirror. + +Section `config` provides a way to authenticate to the registry with TLS client +identity, provide registry CA, or authentication information. +Authentication information has same meaning with the corresponding field in `.docker/config.json`. + +See also matching configuration for [CRI containerd plugin](https://github.com/containerd/cri/blob/master/docs/registry.md). + +Type: `RegistriesConfig` + +Examples: + +```yaml +registries: + mirrors: + docker.io: + endpoints: + - https://registry-1.docker.io + '*': + endpoints: + - http://some.host:123/ + config: + "some.host:123": + tls: + CA: ... # base64-encoded CA certificate in PEM format + clientIdentity: + cert: ... # base64-encoded client certificate in PEM format + key: ... # base64-encoded client key in PEM format + auth: + username: ... + password: ... + auth: ... + identityToken: ... + +``` + +--- + +### ClusterConfig + +#### controlPlane + +Provides control plane specific configuration options. + +Type: `ControlPlaneConfig` + +Examples: + +```yaml +controlPlane: + endpoint: https://1.2.3.4 + localAPIServerPort: 443 + +``` + +#### clusterName + +Configures the cluster's name. + +Type: `string` + +#### network + +Provides cluster network configuration. + +Type: `ClusterNetworkConfig` + +Examples: + +```yaml +network: + cni: + name: flannel + dnsDomain: cluster.local + podSubnets: + - 10.244.0.0/16 + serviceSubnets: + - 10.96.0.0/12 + +``` + +#### token + +The [bootstrap token](https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/). + +Type: `string` + +Examples: + +```yaml +wlzjyw.bei2zfylhs2by0wd +``` + +#### aescbcEncryptionSecret + +The key used for the [encryption of secret data at rest](https://kubernetes.io/docs/tasks/administer-cluster/encrypt-data/). + +Type: `string` + +Examples: + +```yaml +z01mye6j16bspJYtTB/5SFX8j7Ph4JXxM2Xuu4vsBPM= +``` + +#### ca + +The base64 encoded root certificate authority used by Kubernetes. + +Type: `PEMEncodedCertificateAndKey` + +Examples: + +```yaml +ca: + crt: LS0tLS1CRUdJTiBDRV... + key: LS0tLS1CRUdJTiBSU0... + +``` + +#### apiServer + +API server specific configuration options. + +Type: `APIServerConfig` + +Examples: + +```yaml +apiServer: + image: ... + extraArgs: + key: value + certSANs: + - 1.2.3.4 + - 5.6.7.8 + +``` + +#### controllerManager + +Controller manager server specific configuration options. + +Type: `ControllerManagerConfig` + +Examples: + +```yaml +controllerManager: + image: ... + extraArgs: + key: value + +``` + +#### proxy + +Kube-proxy server-specific configuration options + +Type: `ProxyConfig` + +Examples: + +```yaml +proxy: + mode: ipvs + extraArgs: + key: value + +``` + +#### scheduler + +Scheduler server specific configuration options. + +Type: `SchedulerConfig` + +Examples: + +```yaml +scheduler: + image: ... + extraArgs: + key: value + +``` + +#### etcd + +Etcd specific configuration options. + +Type: `EtcdConfig` + +Examples: + +```yaml +etcd: + ca: + crt: LS0tLS1CRUdJTiBDRV... + key: LS0tLS1CRUdJTiBSU0... + image: ... + +``` + +#### podCheckpointer + +Pod Checkpointer specific configuration options. + +Type: `PodCheckpointer` + +Examples: + +```yaml +podCheckpointer: + image: ... + +``` + +#### coreDNS + +Core DNS specific configuration options. + +Type: `CoreDNS` + +Examples: + +```yaml +coreDNS: + image: ... + +``` + +#### extraManifests + +A list of urls that point to additional manifests. +These will get automatically deployed by bootkube. + +Type: `array` + +Examples: + +```yaml +extraManifests: + - "https://www.mysweethttpserver.com/manifest1.yaml" + - "https://www.mysweethttpserver.com/manifest2.yaml" + +``` + +#### extraManifestHeaders + +A map of key value pairs that will be added while fetching the ExtraManifests. + +Type: `map` + +Examples: + +```yaml +extraManifestHeaders: + Token: "1234567" + X-ExtraInfo: info + +``` + +#### adminKubeconfig + +Settings for admin kubeconfig generation. +Certificate lifetime can be configured. + +Type: `AdminKubeconfigConfig` + +Examples: + +```yaml +adminKubeconfig: + certLifetime: 1h + +``` + +--- + +### KubeletConfig + +#### image + +The `image` field is an optional reference to an alternative kubelet image. + +Type: `string` + +Examples: + +```yaml +image: docker.io//kubelet:latest +``` + +#### extraArgs + +The `extraArgs` field is used to provide additional flags to the kubelet. + +Type: `map` + +Examples: + +```yaml +extraArgs: + key: value + +``` + +#### extraMounts + +The `extraMounts` field is used to add additional mounts to the kubelet container. + +Type: `array` + +Examples: + +```yaml +extraMounts: + - source: /var/lib/example + destination: /var/lib/example + type: bind + options: + - rshared + - ro + +``` + +--- + +### NetworkConfig + +#### hostname + +Used to statically set the hostname for the host. + +Type: `string` + +#### interfaces + +`interfaces` is used to define the network interface configuration. +By default all network interfaces will attempt a DHCP discovery. +This can be further tuned through this configuration parameter. + +##### machine.network.interfaces.interface + +This is the interface name that should be configured. + +##### machine.network.interfaces.cidr + +`cidr` is used to specify a static IP address to the interface. +This should be in proper CIDR notation ( `192.168.2.5/24` ). + +> Note: This option is mutually exclusive with DHCP. + +##### machine.network.interfaces.dhcp + +`dhcp` is used to specify that this device should be configured via DHCP. + +The following DHCP options are supported: + +- `OptionClasslessStaticRoute` +- `OptionDomainNameServer` +- `OptionDNSDomainSearchList` +- `OptionHostName` + +> Note: This option is mutually exclusive with CIDR. + +##### machine.network.interfaces.ignore + +`ignore` is used to exclude a specific interface from configuration. +This parameter is optional. + +##### machine.network.interfaces.dummy + +`dummy` is used to specify that this interface should be a virtual-only, dummy interface. +This parameter is optional. + +##### machine.network.interfaces.routes + +`routes` is used to specify static routes that may be necessary. +This parameter is optional. + +Routes can be repeated and includes a `Network` and `Gateway` field. + +Type: `array` + +#### nameservers + +Used to statically set the nameservers for the host. +Defaults to `1.1.1.1` and `8.8.8.8` + +Type: `array` + +#### extraHostEntries + +Allows for extra entries to be added to /etc/hosts file + +Type: `array` + +Examples: + +```yaml +extraHostEntries: + - ip: 192.168.1.100 + aliases: + - test + - test.domain.tld + +``` + +--- + +### InstallConfig + +#### disk + +The disk used to install the bootloader, and ephemeral partitions. + +Type: `string` + +Examples: + +```yaml +/dev/sda +``` + +```yaml +/dev/nvme0 +``` + +#### extraKernelArgs + +Allows for supplying extra kernel args to the bootloader config. + +Type: `array` + +Examples: + +```yaml +extraKernelArgs: + - a=b + +``` + +#### image + +Allows for supplying the image used to perform the installation. + +Type: `string` + +Examples: + +```yaml +image: docker.io//installer:latest + +``` + +#### bootloader + +Indicates if a bootloader should be installed. + +Type: `bool` + +Valid Values: + +- `true` +- `yes` +- `false` +- `no` + +#### wipe + +Indicates if zeroes should be written to the `disk` before performing and installation. +Defaults to `true`. + +Type: `bool` + +Valid Values: + +- `true` +- `yes` +- `false` +- `no` + +#### force + +Indicates if filesystems should be forcefully created. + +Type: `bool` + +Valid Values: + +- `true` +- `yes` +- `false` +- `no` + +--- + +### TimeConfig + +#### servers + +Specifies time (ntp) servers to use for setting system time. +Defaults to `pool.ntp.org` + +> Note: This parameter only supports a single time server + +Type: `array` + +--- + +### RegistriesConfig + +#### mirrors + +Specifies mirror configuration for each registry. +This setting allows to use local pull-through caching registires, +air-gapped installations, etc. + +Registry name is the first segment of image identifier, with 'docker.io' +being default one. +Name '*' catches any registry names not specified explicitly. + +Type: `map` + +#### config + +Specifies TLS & auth configuration for HTTPS image registries. +Mutual TLS can be enabled with 'clientIdentity' option. + +TLS configuration can be skipped if registry has trusted +server certificate. + +Type: `map` + +--- + +### PodCheckpointer + +#### image + +The `image` field is an override to the default pod-checkpointer image. + +Type: `string` + +--- + +### CoreDNS + +#### image + +The `image` field is an override to the default coredns image. + +Type: `string` + +--- + +### Endpoint + +--- + +### ControlPlaneConfig + +#### endpoint + +Endpoint is the canonical controlplane endpoint, which can be an IP address or a DNS hostname. +It is single-valued, and may optionally include a port number. + +Type: `Endpoint` + +Examples: + +```yaml +https://1.2.3.4:443 +``` + +#### localAPIServerPort + +The port that the API server listens on internally. +This may be different than the port portion listed in the endpoint field above. +The default is 6443. + +Type: `int` + +--- + +### APIServerConfig + +#### image + +The container image used in the API server manifest. + +Type: `string` + +#### extraArgs + +Extra arguments to supply to the API server. + +Type: `map` + +#### certSANs + +Extra certificate subject alternative names for the API server's certificate. + +Type: `array` + +--- + +### ControllerManagerConfig + +#### image + +The container image used in the controller manager manifest. + +Type: `string` + +#### extraArgs + +Extra arguments to supply to the controller manager. + +Type: `map` + +--- + +### ProxyConfig + +#### image + +The container image used in the kube-proxy manifest. + +Type: `string` + +#### mode + +proxy mode of kube-proxy. +By default, this is 'iptables'. + +Type: `string` + +#### extraArgs + +Extra arguments to supply to kube-proxy. + +Type: `map` + +--- + +### SchedulerConfig + +#### image + +The container image used in the scheduler manifest. + +Type: `string` + +#### extraArgs + +Extra arguments to supply to the scheduler. + +Type: `map` + +--- + +### EtcdConfig + +#### image + +The container image used to create the etcd service. + +Type: `string` + +#### ca + +The `ca` is the root certificate authority of the PKI. +It is composed of a base64 encoded `crt` and `key`. + +Type: `PEMEncodedCertificateAndKey` + +Examples: + +```yaml +ca: + crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJIekNCMHF... + key: LS0tLS1CRUdJTiBFRDI1NTE5IFBSSVZBVEUgS0VZLS0tLS0KTUM... + +``` + +#### extraArgs + +Extra arguments to supply to etcd. +Note that the following args are not allowed: + +- `name` +- `data-dir` +- `initial-cluster-state` +- `listen-peer-urls` +- `listen-client-urls` +- `cert-file` +- `key-file` +- `trusted-ca-file` +- `peer-client-cert-auth` +- `peer-cert-file` +- `peer-trusted-ca-file` +- `peer-key-file` + +Type: `map` + +Examples: + +```yaml +extraArgs: + initial-cluster: https://1.2.3.4:2380 + advertise-client-urls: https://1.2.3.4:2379 + +``` + +--- + +### ClusterNetworkConfig + +#### cni + +The CNI used. +Composed of "name" and "url". +The "name" key only supports upstream bootkube options of "flannel" or "custom". +URLs is only used if name is equal to "custom". +URLs should point to a single yaml file that will get deployed. +Empty struct or any other name will default to bootkube's flannel. + +Type: `CNIConfig` + +Examples: + +```yaml +cni: + name: "custom" + urls: + - "https://www.mysweethttpserver.com/supersecretcni.yaml" + +``` + +#### dnsDomain + +The domain used by Kubernetes DNS. +The default is `cluster.local` + +Type: `string` + +Examples: + +```yaml +cluser.local +``` + +#### podSubnets + +The pod subnet CIDR. + +Type: `array` + +Examples: + +```yaml +podSubnets: + - 10.244.0.0/16 + +``` + +#### serviceSubnets + +The service subnet CIDR. + +Type: `array` + +Examples: + +```yaml +serviceSubnets: + - 10.96.0.0/12 + +``` + +--- + +### CNIConfig + +#### name + +Name of CNI to use. + +Type: `string` + +#### urls + +URLs containing manifests to apply for CNI. + +Type: `array` + +--- + +### AdminKubeconfigConfig + +#### certLifetime + +Admin kubeconfig certificate lifetime (default is 1 year). +Field format accepts any Go time.Duration format ('1h' for one hour, '10m' for ten minutes). + +Type: `Duration` + +--- + +### MachineDisk + +#### device + +The name of the disk to use. +Type: `string` + +#### partitions + +A list of partitions to create on the disk. +Type: `array` + +--- + +### DiskPartition + +#### size + +This size of the partition in bytes. + +Type: `uint` + +#### mountpoint + +Where to mount the partition. +Type: `string` + +--- + +### MachineFile + +#### content + +The contents of file. +Type: `string` + +#### permissions + +The file's permissions in octal. +Type: `FileMode` + +#### path + +The path of the file. +Type: `string` + +#### op + +The operation to use +Type: `string` + +Valid Values: + +- `create` +- `append` + +--- + +### ExtraHost + +#### ip + +The IP of the host. +Type: `string` + +#### aliases + +The host alias. +Type: `array` + +--- + +### Device + +#### interface + +The interface name. +Type: `string` + +#### cidr + +The CIDR to use. +Type: `string` + +#### routes + +A list of routes associated with the interface. +Type: `array` + +#### bond + +Bond specific options. +Type: `Bond` + +#### vlans + +VLAN specific options. +Type: `array` + +#### mtu + +The interface's MTU. +Type: `int` + +#### dhcp + +Indicates if DHCP should be used. +Type: `bool` + +#### ignore + +Indicates if the interface should be ignored. +Type: `bool` + +#### dummy + +Indicates if the interface is a dummy interface. +Type: `bool` + +--- + +### Bond + +#### interfaces + +The interfaces that make up the bond. +Type: `array` + +#### arpIPTarget + +A bond option. +Please see the official kernel documentation. + +Type: `array` + +#### mode + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### xmitHashPolicy + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### lacpRate + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### adActorSystem + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### arpValidate + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### arpAllTargets + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### primary + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### primaryReselect + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### failOverMac + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### adSelect + +A bond option. +Please see the official kernel documentation. + +Type: `string` + +#### miimon + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### updelay + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### downdelay + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### arpInterval + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### resendIgmp + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### minLinks + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### lpInterval + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### packetsPerSlave + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +#### numPeerNotif + +A bond option. +Please see the official kernel documentation. + +Type: `uint8` + +#### tlbDynamicLb + +A bond option. +Please see the official kernel documentation. + +Type: `uint8` + +#### allSlavesActive + +A bond option. +Please see the official kernel documentation. + +Type: `uint8` + +#### useCarrier + +A bond option. +Please see the official kernel documentation. + +Type: `bool` + +#### adActorSysPrio + +A bond option. +Please see the official kernel documentation. + +Type: `uint16` + +#### adUserPortKey + +A bond option. +Please see the official kernel documentation. + +Type: `uint16` + +#### peerNotifyDelay + +A bond option. +Please see the official kernel documentation. + +Type: `uint32` + +--- + +### Vlan + +#### cidr + +The CIDR to use. +Type: `string` + +#### routes + +A list of routes associated with the VLAN. +Type: `array` + +#### dhcp + +Indicates if DHCP should be used. +Type: `bool` + +#### vlanId + +The VLAN's ID. +Type: `uint16` + +--- + +### Route + +#### network + +The route's network. +Type: `string` + +#### gateway + +The route's gateway. +Type: `string` + +--- + +### RegistryMirrorConfig + +#### endpoints + +List of endpoints (URLs) for registry mirrors to use. +Endpoint configures HTTP/HTTPS access mode, host name, +port and path (if path is not set, it defaults to `/v2`). + +Type: `array` + +--- + +### RegistryConfig + +#### tls + +The TLS configuration for this registry. +Type: `RegistryTLSConfig` + +#### auth + +The auth configuration for this registry. +Type: `RegistryAuthConfig` + +--- + +### RegistryAuthConfig + +#### username + +Optional registry authentication. +The meaning of each field is the same with the corresponding field in .docker/config.json. + +Type: `string` + +#### password + +Optional registry authentication. +The meaning of each field is the same with the corresponding field in .docker/config.json. + +Type: `string` + +#### auth + +Optional registry authentication. +The meaning of each field is the same with the corresponding field in .docker/config.json. + +Type: `string` + +#### identityToken + +Optional registry authentication. +The meaning of each field is the same with the corresponding field in .docker/config.json. + +Type: `string` + +--- + +### RegistryTLSConfig + +#### clientIdentity + +Enable mutual TLS authentication with the registry. +Client certificate and key should be base64-encoded. + +Type: `PEMEncodedCertificateAndKey` + +Examples: + +```yaml +clientIdentity: + crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJIekNCMHF... + key: LS0tLS1CRUdJTiBFRDI1NTE5IFBSSVZBVEUgS0VZLS0tLS0KTUM... + +``` + +#### ca + +CA registry certificate to add the list of trusted certificates. +Certificate should be base64-encoded. + +Type: `array` + +#### insecureSkipVerify + +Skip TLS server certificate verification (not recommended). + +Type: `bool` + +--- diff --git a/docs/website/content/v0.7/en/customization/containerd.md b/docs/website/content/v0.7/en/customization/containerd.md new file mode 100644 index 000000000..db9b9a96c --- /dev/null +++ b/docs/website/content/v0.7/en/customization/containerd.md @@ -0,0 +1,33 @@ +--- +title: 'Customizing containerd' +--- + +We offer the ability to use custom configuration files for containerd. +The base containerd configuration expects to merge in any additional configs present in `/var/cri/conf.d/*.toml`. + +## An example of exposing metrics + +Into each machine config, add the following: + +```yaml +machine: + ... + files: + - content: | + [metrics] + address = "0.0.0.0:11234" + path: /var/cri/conf.d/metrics.toml + op: create +``` + +Create cluster like normal and see that metrics are now present on this port: + +```bash +$ curl 127.0.0.1:11234/v1/metrics +# HELP container_blkio_io_service_bytes_recursive_bytes The blkio io service bytes recursive +# TYPE container_blkio_io_service_bytes_recursive_bytes gauge +container_blkio_io_service_bytes_recursive_bytes{container_id="0677d73196f5f4be1d408aab1c4125cf9e6c458a4bea39e590ac779709ffbe14",device="/dev/dm-0",major="253",minor="0",namespace="k8s.io",op="Async"} 0 +container_blkio_io_service_bytes_recursive_bytes{container_id="0677d73196f5f4be1d408aab1c4125cf9e6c458a4bea39e590ac779709ffbe14",device="/dev/dm-0",major="253",minor="0",namespace="k8s.io",op="Discard"} 0 +... +... +``` diff --git a/docs/website/content/v0.7/en/customization/kernel.md b/docs/website/content/v0.7/en/customization/kernel.md new file mode 100644 index 000000000..336b63052 --- /dev/null +++ b/docs/website/content/v0.7/en/customization/kernel.md @@ -0,0 +1,21 @@ +--- +title: 'Kernel' +--- + +## Customizing the Kernel + +```docker +FROM scratch AS customization +COPY --from= /lib/modules /lib/modules + +FROM docker.io/andrewrynhard/installer:latest +COPY --from= /boot/vmlinuz /usr/install/vmlinuz +``` + +```bash +docker build --build-arg RM="/lib/modules" -t talos-installer . +``` + +> Note: You can use the `--squash` flag to create smaller images. + +Now that we have a custom installer we can build Talos for the specific platform we wish to deploy to. diff --git a/docs/website/content/v0.7/en/customization/overview.md b/docs/website/content/v0.7/en/customization/overview.md new file mode 100644 index 000000000..025af1187 --- /dev/null +++ b/docs/website/content/v0.7/en/customization/overview.md @@ -0,0 +1,60 @@ +--- +title: 'Customization' +--- + +The installer image contains [`ONBUILD`](https://docs.docker.com/engine/reference/builder/#onbuild) instructions that handle the following: + +- the decompression, and unpacking of the `initramfs.xz` +- the unsquashing of the rootfs +- the copying of new rootfs files +- the squashing of the new rootfs +- and the packing, and compression of the new `initramfs.xz` + +When used as a base image, the installer will perform the above steps automatically with the requirement that a `customization` stage be defined in the `Dockerfile`. + +For example, say we have an image that contains the contents of a library we wish to add to the Talos rootfs. +We need to define a stage with the name `customization`: + +```docker +FROM scratch AS customization +COPY --from= +``` + +Using a multi-stage `Dockerfile` we can define the `customization` stage and build `FROM` the installer image: + +```docker +FROM scratch AS customization +COPY --from= + +FROM docker.io/autonomy/installer:latest +``` + +When building the image, the `customization` stage will automatically be copied into the rootfs. +The `customization` stage is not limited to a single `COPY` instruction. +In fact, you can do whatever you would like in this stage, but keep in mind that everything in `/` will be copied into the rootfs. + +> Note: `` is the path relative to the rootfs that you wish to place the contents of ``. + +To build the image, run: + +```bash +docker build --squash -t /installer:latest . +``` + +In the case that you need to perform some cleanup _before_ adding additional files to the rootfs, you can specify the `RM` [build-time variable](https://docs.docker.com/engine/reference/commandline/build/#set-build-time-variables---build-arg): + +```bash +docker build --squash --build-arg RM="[ ...]" -t /installer:latest . +``` + +This will perform a `rm -rf` on the specified paths relative to the rootfs. + +> Note: `RM` must be a whitespace delimited list. + +The resulting image can be used to: + +- generate an image for any of the supported providers +- perform bare-metall installs +- perform upgrades + +We will step through common customizations in the remainder of this section. diff --git a/docs/website/content/v0.7/en/customization/proxy.md b/docs/website/content/v0.7/en/customization/proxy.md new file mode 100644 index 000000000..1fa6618d6 --- /dev/null +++ b/docs/website/content/v0.7/en/customization/proxy.md @@ -0,0 +1,51 @@ +--- +title: 'Running Behind a Corporate Proxy' +--- + +## Appending the Certificate Authority of MITM Proxies + +Put into each machine the PEM encoded certificate: + +```yaml +machine: + ... + files: + - content: | + -----BEGIN CERTIFICATE----- + ... + -----END CERTIFICATE----- + permissions: 0644 + path: /etc/ssl/certs/ca-certificates + op: append +``` + +## Configuring a Machine to Use the Proxy + +To make use of a proxy: + +```yaml +machine: + env: + http_proxy: + https_proxy: + no_proxy: +``` + +Additionally, configure the DNS `nameservers`, and NTP `servers`: + +```yaml +machine: + env: + ... + time: + servers: + - + - + - + ... + network: + nameservers: + - + - + - +``` diff --git a/docs/website/content/v0.7/en/guides/cloud/aws.md b/docs/website/content/v0.7/en/guides/cloud/aws.md new file mode 100644 index 000000000..7fdf8ef3f --- /dev/null +++ b/docs/website/content/v0.7/en/guides/cloud/aws.md @@ -0,0 +1,245 @@ +--- +title: 'AWS' +--- + +## Creating a Cluster via the AWS CLI + +In this guide we will create an HA Kubernetes cluster with 3 worker nodes. +We assume an existing VPC, and some familiarity with AWS. +If you need more information on AWS specifics, please see the [official AWS documentation](https://docs.aws.amazon.com). + +### Create the Subnet + +```bash +aws ec2 create-subnet \ + --region $REGION \ + --vpc-id $VPC \ + --cidr-block ${CIDR_BLOCK} +``` + +### Create the AMI + +#### Prepare the Import Prerequisites + +##### Create the S3 Bucket + +```bash +aws s3api create-bucket \ + --bucket $BUCKET \ + --create-bucket-configuration LocationConstraint=$REGION \ + --acl private +``` + +##### Create the `vmimport` Role + +In order to create an AMI, ensure that the `vmimport` role exists as described in the [official AWS documentation](https://docs.aws.amazon.com/vm-import/latest/userguide/vmie_prereqs.html#vmimport-role). + +Note that the role should be associated with the S3 bucket we created above. + +##### Create the Image Snapshot + +First, download the AWS image from a Talos release: + +```bash +curl -LO https://github.com/talos-systems/talos/releases/latest/download/aws.tar.gz | tar -xv +``` + +Copy the RAW disk to S3 and import it as a snapshot: + +```bash +aws s3 cp disk.raw s3://$BUCKET/talos-aws-tutorial.raw +aws ec2 import-snapshot \ + --region $REGION \ + --description "Talos kubernetes tutorial" \ + --disk-container "Format=raw,UserBucket={S3Bucket=$BUCKET,S3Key=talos-aws-tutorial.raw}" +``` + +Save the `SnapshotId`, as we will need it once the import is done. +To check on the status of the import, run: + +```bash +aws ec2 describe-import-snapshot-tasks \ + --region $REGION \ + --import-task-ids +``` + +Once the `SnapshotTaskDetail.Status` indicates `completed`, we can register the image. + +##### Register the Image + +```bash +aws ec2 register-image \ + --region $REGION \ + --block-device-mappings "DeviceName=/dev/xvda,VirtualName=talos,Ebs={DeleteOnTermination=true,SnapshotId=$SNAPSHOT,VolumeSize=4,VolumeType=gp2}" \ + --root-device-name /dev/xvda \ + --virtualization-type hvm \ + --architecture x86_64 \ + --ena-support \ + --name talos-aws-tutorial-ami +``` + +We now have an AMI we can use to create our cluster. +Save the AMI ID, as we will need it when we create EC2 instances. + +### Create a Security Group + +```bash +aws ec2 create-security-group \ + --region $REGION \ + --group-name talos-aws-tutorial-sg \ + --description "Security Group for EC2 instances to allow ports required by Talos" +``` + +Using the security group ID from above, allow all internal traffic within the same security group: + +```bash +aws ec2 authorize-security-group-ingress \ + --region $REGION \ + --group-name talos-aws-tutorial-sg \ + --protocol all \ + --port 0 \ + --group-id $SECURITY_GROUP \ + --source-group $SECURITY_GROUP +``` + +and expose the Talos and Kubernetes APIs: + +```bash +aws ec2 authorize-security-group-ingress \ + --region $REGION \ + --group-name talos-aws-tutorial-sg \ + --protocol tcp \ + --port 6443 \ + --cidr 0.0.0.0/0 \ + --group-id $SECURITY_GROUP +aws ec2 authorize-security-group-ingress \ + --region $REGION \ + --group-name talos-aws-tutorial-sg \ + --protocol tcp \ + --port 50000-50001 \ + --cidr 0.0.0.0/0 \ + --group-id $SECURITY_GROUP +``` + +### Create a Load Balancer + +```bash +aws elbv2 create-load-balancer \ + --region $REGION \ + --name talos-aws-tutorial-lb \ + --type network --subnets $SUBNET +``` + +Take note of the DNS name and ARN. +We will need these soon. + +### Create the Machine Configuration Files + +#### Generating Base Configurations + +Using the DNS name of the loadbalancer created earlier, generate the base configuration files for the Talos machines: + +```bash +$ talosctl gen config talos-k8s-aws-tutorial https://: +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +At this point, you can modify the generated configs to your liking. + +#### Validate the Configuration Files + +```bash +$ talosctl validate --config init.yaml --mode cloud +init.yaml is valid for cloud mode +$ talosctl validate --config controlplane.yaml --mode cloud +controlplane.yaml is valid for cloud mode +$ talosctl validate --config join.yaml --mode cloud +join.yaml is valid for cloud mode +``` + +### Create the EC2 Instances + +> Note: There is a known issue that prevents Talos from running on T2 instance types. +> Please use T3 if you need burstable instance types. + +#### Create the Bootstrap Node + +```bash +aws ec2 run-instances \ + --region $REGION \ + --image-id $AMI \ + --count 1 \ + --instance-type t3.small \ + --user-data file://init.yaml \ + --subnet-id $SUBNET \ + --security-group-ids $SECURITY_GROUP +``` + +#### Create the Remaining Control Plane Nodes + +```bash +aws ec2 run-instances \ + --region $REGION \ + --image-id $AMI \ + --count 2 \ + --instance-type t3.small \ + --user-data file://controlplane.yaml \ + --subnet-id $SUBNET \ + --security-group-ids $SECURITY_GROUP +``` + +#### Create the Worker Nodes + +```bash +aws ec2 run-instances \ + --region $REGION \ + --image-id $AMI \ + --count 3 \ + --instance-type t3.small \ + --user-data file://join.yaml \ + --subnet-id $SUBNET \ + --security-group-ids $SECURITY_GROUP +``` + +### Configure the Load Balancer + +```bash +aws elbv2 create-target-group \ + --region $REGION \ + --name talos-aws-tutorial-tg \ + --protocol TCP \ + --port 6443 \ + --vpc-id $VPC +``` + +Now, using the target group's ARN, register the control plane nodes: + +```bash +aws elbv2 register-targets \ + --region $REGION \ + --target-group-arn $TARGET_GROUP_ARN \ + --targets Id=$CP_NODE_1 Id=$CP_NODE_2 Id=$CP_NODE_3 +``` + +Using the ARNs of the load balancer and target group from previous steps, create the listener: + +```bash +aws elbv2 create-listener \ + --region $REGION \ + --load-balancer-arn $LOAD_BALANCER_ARN \ + --protocol TCP \ + --port 443 \ + --default-actions Type=forward,TargetGroupArn=$TARGET_GROUP_ARN +``` + +### Retrieve the `kubeconfig` + +At this point we can retrieve the admin `kubeconfig` by running: + +```bash +talosctl --talosconfig talosconfig config endpoint +talosctl --talosconfig talosconfig kubeconfig . +``` diff --git a/docs/website/content/v0.7/en/guides/cloud/azure.md b/docs/website/content/v0.7/en/guides/cloud/azure.md new file mode 100644 index 000000000..2369121ea --- /dev/null +++ b/docs/website/content/v0.7/en/guides/cloud/azure.md @@ -0,0 +1,280 @@ +--- +title: 'Azure' +--- + +## Creating a Cluster via the CLI + +In this guide we will create an HA Kubernetes cluster with 1 worker node. +We assume existing [Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/), and some familiarity with Azure. +If you need more information on Azure specifics, please see the [official Azure documentation](https://docs.microsoft.com/en-us/azure/). + +### Environment Setup + +We'll make use of the following environment variables throughout the setup. +Edit the variables below with your correct information. + +```bash +# Storage account to use +export STORAGE_ACCOUNT="StorageAccountName" + +# Storage container to upload to +export STORAGE_CONTAINER="StorageContainerName" + +# Resource group name +export GROUP="ResourceGroupName" + +# Location +export LOCATION="centralus" + +# Get storage account connection string based on info above +export CONNECTION=$(az storage account show-connection-string \ + -n $STORAGE_ACCOUNT \ + -g $GROUP \ + -o tsv) +``` + +### Create the Image + +First, download the Azure image from a [Talos release](https://github.com/talos-systems/talos/releases). +Once downloaded, untar with `tar -xvf /path/to/azure.tar.gz` + +#### Upload the VHD + +Once you have pulled down the image, you can upload it to blob storage with: + +```bash +az storage blob upload \ + --connection-string $CONNECTION \ + --container-name $STORAGE_CONTAINER \ + -f /path/to/extracted/talos-azure.vhd \ + -n talos-azure.vhd +``` + +#### Register the Image + +Now that the image is present in our blob storage, we'll register it. + +```bash +az image create \ + --name talos \ + --source https://$STORAGE_ACCOUNT.blob.core.windows.net/$STORAGE_CONTAINER/talos-azure.vhd \ + --os-type linux \ + -g $GROUP +``` + +### Network Infrastructure + +#### Virtual Networks and Security Groups + +Once the image is prepared, we'll want to work through setting up the network. +Issue the following to create a network security group and add rules to it. + +```bash +# Create vnet +az network vnet create \ + --resource-group $GROUP \ + --location $LOCATION \ + --name talos-vnet \ + --subnet-name talos-subnet + +# Create network security group +az network nsg create -g $GROUP -n talos-sg + +# Client -> apid +az network nsg rule create \ + -g $GROUP \ + --nsg-name talos-sg \ + -n apid \ + --priority 1001 \ + --destination-port-ranges 50000 \ + --direction inbound + +# Trustd +az network nsg rule create \ + -g $GROUP \ + --nsg-name talos-sg \ + -n trustd \ + --priority 1002 \ + --destination-port-ranges 50001 \ + --direction inbound + +# etcd +az network nsg rule create \ + -g $GROUP \ + --nsg-name talos-sg \ + -n etcd \ + --priority 1003 \ + --destination-port-ranges 2379-2380 \ + --direction inbound + +# Kubernetes API Server +az network nsg rule create \ + -g $GROUP \ + --nsg-name talos-sg \ + -n kube \ + --priority 1004 \ + --destination-port-ranges 6443 \ + --direction inbound +``` + +#### Load Balancer + +We will create a public ip, load balancer, and a health check that we will use for our control plane. + +```bash +# Create public ip +az network public-ip create \ + --resource-group $GROUP \ + --name talos-public-ip \ + --allocation-method static + +# Create lb +az network lb create \ + --resource-group $GROUP \ + --name talos-lb \ + --public-ip-address talos-public-ip \ + --frontend-ip-name talos-fe \ + --backend-pool-name talos-be-pool + +# Create health check +az network lb probe create \ + --resource-group $GROUP \ + --lb-name talos-lb \ + --name talos-lb-health \ + --protocol tcp \ + --port 6443 + +# Create lb rule for 6443 +az network lb rule create \ + --resource-group $GROUP \ + --lb-name talos-lb \ + --name talos-6443 \ + --protocol tcp \ + --frontend-ip-name talos-fe \ + --frontend-port 6443 \ + --backend-pool-name talos-be-pool \ + --backend-port 6443 \ + --probe-name talos-lb-health +``` + +#### Network Interfaces + +In Azure, we have to pre-create the NICs for our control plane so that they can be associated with our load balancer. + +```bash +for i in $( seq 0 1 2 ); do + # Create public IP for each nic + az network public-ip create \ + --resource-group $GROUP \ + --name talos-controlplane-public-ip-$i \ + --allocation-method static + + + # Create nic + az network nic create \ + --resource-group $GROUP \ + --name talos-controlplane-nic-$i \ + --vnet-name talos-vnet \ + --subnet talos-subnet \ + --network-security-group talos-sg \ + --public-ip-address talos-controlplane-public-ip-$i\ + --lb-name talos-lb \ + --lb-address-pools talos-be-pool +done +``` + +### Cluster Configuration + +With our networking bits setup, we'll fetch the IP for our load balancer and create our configuration files. + +```bash +LB_PUBLIC_IP=$(az network public-ip show \ + --resource-group $GROUP \ + --name talos-public-ip \ + --query [ipAddress] \ + --output tsv) + +talosctl gen config talos-k8s-azure-tutorial https://${LB_PUBLIC_IP}:6443 +``` + +### Compute Creation + +We are now ready to create our azure nodes. + +```bash +# Create availability set +az vm availability-set create \ + --name talos-controlplane-av-set \ + -g $GROUP + +# Create controlplane 0 +az vm create \ + --name talos-controlplane-0 \ + --image talos \ + --custom-data ./init.yaml \ + -g $GROUP \ + --admin-username talos \ + --generate-ssh-keys \ + --verbose \ + --boot-diagnostics-storage $STORAGE_ACCOUNT \ + --os-disk-size-gb 20 \ + --nics talos-controlplane-nic-0 \ + --availability-set talos-controlplane-av-set \ + --no-wait + +# Create 2 more controlplane nodes +for i in $( seq 1 2 ); do + az vm create \ + --name talos-controlplane-$i \ + --image talos \ + --custom-data ./controlplane.yaml \ + -g $GROUP \ + --admin-username talos \ + --generate-ssh-keys \ + --verbose \ + --boot-diagnostics-storage $STORAGE_ACCOUNT \ + --os-disk-size-gb 20 \ + --nics talos-controlplane-nic-$i \ + --availability-set talos-controlplane-av-set \ + --no-wait +done + +# Create worker node + az vm create \ + --name talos-worker-0 \ + --image talos \ + --vnet-name talos-vnet \ + --subnet talos-subnet \ + --custom-data ./join.yaml \ + -g $GROUP \ + --admin-username talos \ + --generate-ssh-keys \ + --verbose \ + --boot-diagnostics-storage $STORAGE_ACCOUNT \ + --nsg talos-sg \ + --os-disk-size-gb 20 \ + --no-wait + +# NOTES: +# `--admin-username` and `--generate-ssh-keys` are required by the az cli, +# but are not actually used by talos +# `--os-disk-size-gb` is the backing disk for Kubernetes and any workload containers +# `--boot-diagnostics-storage` is to enable console output which may be necessary +# for troubleshooting +``` + +### Retrieve the `kubeconfig` + +You should now be able to interact with your cluster with `talosctl`. +We will need to discover the public IP for our first control plane node first. + +```bash +CONTROL_PLANE_0_IP=$(az network public-ip show \ + --resource-group $GROUP \ + --name talos-controlplane-public-ip-0 \ + --query [ipAddress] \ + --output tsv) +talosctl --talosconfig ./talosconfig config endpoint $CONTROL_PLANE_0_IP +talosctl --talosconfig ./talosconfig kubeconfig . +kubectl --kubeconfig ./kubeconfig get nodes +``` diff --git a/docs/website/content/v0.7/en/guides/cloud/digitalocean.md b/docs/website/content/v0.7/en/guides/cloud/digitalocean.md new file mode 100644 index 000000000..afacf683b --- /dev/null +++ b/docs/website/content/v0.7/en/guides/cloud/digitalocean.md @@ -0,0 +1,148 @@ +--- +title: 'Digital Ocean' +--- + +## Creating a Cluster via the CLI + +In this guide we will create an HA Kubernetes cluster with 1 worker node. +We assume an existing [Space](https://www.digitalocean.com/docs/spaces/), and some familiarity with Digital Ocean. +If you need more information on Digital Ocean specifics, please see the [official Digital Ocean documentation](https://www.digitalocean.com/docs/). + +### Create the Image + +First, download the Digital Ocean image from a Talos release. + +Using an upload method of your choice (`doctl` does not have Spaces support), upload the image to a space. +Now, create an image using the URL of the uploaded image: + +```bash +doctl compute image create \ + --region $REGION \ + --image-name talos-digital-ocean-tutorial \ + --image-url https://talos-tutorial.$REGION.digitaloceanspaces.com/digital-ocean.raw.gz \ + Talos +``` + +Save the image ID. +We will need it when creating droplets. + +### Create a Load Balancer + +```bash +doctl compute load-balancer create \ + --region $REGION \ + --name talos-digital-ocean-tutorial-lb \ + --tag-name talos-digital-ocean-tutorial-control-plane \ + --health-check protocol:tcp,port:6443,check_interval_seconds:10,response_timeout_seconds:5,healthy_threshold:5,unhealthy_threshold:3 \ + --forwarding-rules entry_protocol:tcp,entry_port:443,target_protocol:tcp,target_port:6443 +``` + +We will need the IP of the load balancer. +Using the ID of the load balancer, run: + +```bash +doctl compute load-balancer get --format IP +``` + +Save it, as we will need it in the next step. + +### Create the Machine Configuration Files + +#### Generating Base Configurations + +Using the DNS name of the loadbalancer created earlier, generate the base configuration files for the Talos machines: + +```bash +$ talosctl gen config talos-k8s-digital-ocean-tutorial https://: +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +At this point, you can modify the generated configs to your liking. + +#### Validate the Configuration Files + +```bash +$ talosctl validate --config init.yaml --mode cloud +init.yaml is valid for cloud mode +$ talosctl validate --config controlplane.yaml --mode cloud +controlplane.yaml is valid for cloud mode +$ talosctl validate --config join.yaml --mode cloud +join.yaml is valid for cloud mode +``` + +### Create the Droplets + +#### Create the Bootstrap Node + +```bash +doctl compute droplet create \ + --region $REGION \ + --image \ + --size s-2vcpu-4gb \ + --enable-private-networking \ + --tag-names talos-digital-ocean-tutorial-control-plane \ + --user-data-file init.yaml \ + --ssh-keys \ + talos-control-plane-1 +``` + +> Note: Although SSH is not used by Talos, Digital Ocean still requires that an SSH key be associated with the droplet. +> Create a dummy key that can be used to satisfy this requirement. + +#### Create the Remaining Control Plane Nodes + +Run the following twice, to give ourselves three total control plane nodes: + +```bash +doctl compute droplet create \ + --region $REGION \ + --image \ + --size s-2vcpu-4gb \ + --enable-private-networking \ + --tag-names talos-digital-ocean-tutorial-control-plane \ + --user-data-file controlplane.yaml \ + --ssh-keys \ + talos-control-plane-2 +doctl compute droplet create \ + --region $REGION \ + --image \ + --size s-2vcpu-4gb \ + --enable-private-networking \ + --tag-names talos-digital-ocean-tutorial-control-plane \ + --user-data-file controlplane.yaml \ + --ssh-keys \ + talos-control-plane-3 +``` + +#### Create the Worker Nodes + +Run the following to create a worker node: + +```bash +doctl compute droplet create \ + --region $REGION \ + --image \ + --size s-2vcpu-4gb \ + --enable-private-networking \ + --user-data-file join.yaml \ + --ssh-keys \ + talos-worker-1 +``` + +### Retrieve the `kubeconfig` + +To configure `talosctl` we will need the first controla plane node's IP: + +```bash +doctl compute droplet get --format PublicIPv4 +``` + +At this point we can retrieve the admin `kubeconfig` by running: + +```bash +talosctl --talosconfig talosconfig config endpoint +talosctl --talosconfig talosconfig kubeconfig . +``` diff --git a/docs/website/content/v0.7/en/guides/cloud/gcp.md b/docs/website/content/v0.7/en/guides/cloud/gcp.md new file mode 100644 index 000000000..f20fcb4ee --- /dev/null +++ b/docs/website/content/v0.7/en/guides/cloud/gcp.md @@ -0,0 +1,167 @@ +--- +title: 'GCP' +--- + +## Creating a Cluster via the CLI + +In this guide, we will create an HA Kubernetes cluster in GCP with 1 worker node. +We will assume an existing [Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets), and some familiarity with Google Cloud. +If you need more information on Google Cloud specifics, please see the [official Google documentation](https://cloud.google.com/docs/). + +### Environment Setup + +We'll make use of the following environment variables throughout the setup. +Edit the variables below with your correct information. + +```bash +# Storage account to use +export STORAGE_BUCKET="StorageBucketName" +# Region +export REGION="us-central1" +``` + +### Create the Image + +First, download the Google Cloud image from a Talos [release](https://github.com/talos-systems/talos/releases). +These images are called `gcp.tar.gz`. + +#### Upload the Image + +Once you have downloaded the image, you can upload it to your storage bucket with: + +```bash +gsutil cp /path/to/gcp.tar.gz gs://$STORAGE_BUCKET +``` + +#### Register the image + +Now that the image is present in our bucket, we'll register it. + +```bash +gcloud compute images create talos \ + --source-uri=gs://$STORAGE_BUCKET/gcp.tar.gz \ + --guest-os-features=VIRTIO_SCSI_MULTIQUEUE +``` + +### Network Infrastructure + +#### Load Balancers and Firewalls + +Once the image is prepared, we'll want to work through setting up the network. +Issue the following to create a firewall, load balancer, and their required components. + +```bash +# Create Instance Group +gcloud compute instance-groups unmanaged create talos-ig \ + --zone $REGION-b + +# Create port for IG +gcloud compute instance-groups set-named-ports talos-ig \ + --named-ports tcp6443:6443 \ + --zone $REGION-b + +# Create health check +gcloud compute health-checks create tcp talos-health-check --port 6443 + +# Create backend +gcloud compute backend-services create talos-be \ + --global \ + --protocol TCP \ + --health-checks talos-health-check \ + --timeout 5m \ + --port-name tcp6443 + +# Add instance group to backend +gcloud compute backend-services add-backend talos-be \ + --global \ + --instance-group talos-ig \ + --instance-group-zone $REGION-b + +# Create tcp proxy +gcloud compute target-tcp-proxies create talos-tcp-proxy \ + --backend-service talos-be \ + --proxy-header NONE + +# Create LB IP +gcloud compute addresses create talos-lb-ip --global + +# Forward 443 from LB IP to tcp proxy +gcloud compute forwarding-rules create talos-fwd-rule \ + --global \ + --ports 443 \ + --address talos-lb-ip \ + --target-tcp-proxy talos-tcp-proxy + +# Create firewall rule for health checks + gcloud compute firewall-rules create talos-controlplane-firewall \ + --source-ranges 130.211.0.0/22,35.191.0.0/16 \ + --target-tags talos-controlplane \ + --allow tcp:6443 +``` + +### Cluster Configuration + +With our networking bits setup, we'll fetch the IP for our load balancer and create our configuration files. + +```bash +LB_PUBLIC_IP=$(gcloud compute forwarding-rules describe talos-fwd-rule \ + --global \ + --format json \ + | jq -r .IPAddress) + +talosctl gen config talos-k8s-gcp-tutorial https://${LB_PUBLIC_IP}:443 +``` + +### Compute Creation + +We are now ready to create our azure nodes. + +```bash +# Create control plane 0 +gcloud compute instances create talos-controlplane-0 \ + --image talos \ + --zone $REGION-b \ + --tags talos-controlplane \ + --boot-disk-size 20GB \ + --metadata-from-file=user-data=./init.yaml + +# Create control plane 1/2 +for i in $( seq 1 2 ); do + gcloud compute instances create talos-controlplane-$i \ + --image talos \ + --zone $REGION-b \ + --tags talos-controlplane \ + --boot-disk-size 20GB \ + --metadata-from-file=user-data=./controlplane.yaml +done + +# Add control plane nodes to instance group +for i in $( seq 0 1 2 ); do + gcloud compute instance-groups unmanaged add-instances talos-ig \ + --zone $REGION-b \ + --instances talos-controlplane-$i +done + +# Create worker +gcloud compute instances create talos-worker-0 \ + --image talos \ + --zone $REGION-b \ + --boot-disk-size 20GB \ + --metadata-from-file=user-data=./join.yaml +``` + +### Retrieve the `kubeconfig` + +You should now be able to interact with your cluster with `talosctl`. +We will need to discover the public IP for our first control plane node first. + +```bash +CONTROL_PLANE_0_IP=$(gcloud compute instances describe talos-controlplane-0 \ + --zone $REGION-b \ + --format json \ + | jq -r '.networkInterfaces[0].accessConfigs[0].natIP') + +talosctl --talosconfig ./talosconfig config endpoint $CONTROL_PLANE_0_IP +talosctl --talosconfig ./talosconfig kubeconfig . +kubectl --kubeconfig ./kubeconfig get nodes +``` diff --git a/docs/website/content/v0.7/en/guides/cloud/vmware.md b/docs/website/content/v0.7/en/guides/cloud/vmware.md new file mode 100644 index 000000000..314f47890 --- /dev/null +++ b/docs/website/content/v0.7/en/guides/cloud/vmware.md @@ -0,0 +1,215 @@ +--- +title: 'VMware' +--- + +## Creating a Cluster via the `govc` CLI + +In this guide we will create an HA Kubernetes cluster with 3 worker nodes. +We will use the `govc` cli which can be downloaded [here](https://github.com/vmware/govmomi/tree/master/govc#installation). + +### Prerequisites + +Prior to starting, it is important to have the following infrastructure in place and available: + +- DHCP server +- Load Balancer or DNS address for cluster endpoint + - If using a load balancer, the most common setup is to balance `tcp/443` across the control plane nodes `tcp/6443` + - If using a DNS address, the A record should return back the addresses of the control plane nodes + +### Create the Machine Configuration Files + +#### Generating Base Configurations + +Using the DNS name or name of the loadbalancer used in the prereq steps, generate the base configuration files for the Talos machines: + +```bash +$ talosctl gen config talos-k8s-vmware-tutorial https://: +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +```bash +$ talosctl gen config talos-k8s-vmware-tutorial https://:6443 +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +At this point, you can modify the generated configs to your liking. + +#### Validate the Configuration Files + +```bash +$ talosctl validate --config init.yaml --mode cloud +init.yaml is valid for cloud mode +$ talosctl validate --config controlplane.yaml --mode cloud +controlplane.yaml is valid for cloud mode +$ talosctl validate --config join.yaml --mode cloud +join.yaml is valid for cloud mode +``` + +### Set Environment Variables + +`govc` makes use of the following environment variables + +```bash +export GOVC_URL= +export GOVC_USERNAME= +export GOVC_PASSWORD= +``` + +> Note: If your vCenter installation makes use of self signed certificates, you'll want to export `GOVC_INSECURE=true`. + +There are some additional variables that you may need to set: + +```bash +export GOVC_DATACENTER= +export GOVC_RESOURCE_POOL= +export GOVC_DATASTORE= +export GOVC_NETWORK= +``` + +### Download the OVA + +A `talos.ova` asset is published with each [release](https://github.com/talos-systems/talos/releases). +We will refer to the version of the release as `$TALOS_VERSION` below. +It can be easily exported with `export TALOS_VERSION="v0.3.0-alpha.10"` or similar. + +```bash +curl -LO https://github.com/talos-systems/talos/releases/download/$TALOS_VERSION/talos.ova +``` + +### Import the OVA into vCenter + +We'll need to repeat this step for each Talos node we want to create. +In a typical HA setup, we'll have 3 control plane nodes and N workers. +In the following example, we'll setup a HA control plane with two worker nodes. + +```bash +govc import.ova -name talos-$TALOS_VERSION /path/to/downloaded/talos.ova +``` + +#### Create the Bootstrap Node + +We'll clone the OVA to create the bootstrap node (our first control plane node). + +```bash +govc vm.clone -on=false -vm talos-$TALOS_VERSION control-plane-1 +``` + +Talos makes use of the `guestinfo` facility of VMware to provide the machine/cluster configuration. +This can be set using the `govc vm.change` command. +To facilitate persistent storage using the vSphere cloud provider integration with Kubernetes, `disk.enableUUID=1` is used. + +```bash +govc vm.change \ + -e "guestinfo.talos.config=$(cat init.yaml | base64)" \ + -e "disk.enableUUID=1" \ + -vm /ha-datacenter/vm/control-plane-1 +``` + +#### Update Hardware Resources for the Bootstrap Node + +- `-c` is used to configure the number of cpus +- `-m` is used to configure the amount of memory (in MB) + +```bash +govc vm.change \ + -c 2 \ + -m 4096 \ + -vm /ha-datacenter/vm/control-plane-1 +``` + +The following can be used to adjust the ephemeral disk size. + +```bash +govc vm.disk.change -vm control-plane-1 -disk.name disk-1000-0 -size 10G +``` + +```bash +govc vm.power -on control-plane-1 +``` + +#### Create the Remaining Control Plane Nodes + +```bash +govc vm.clone -on=false -vm talos-$TALOS_VERSION control-plane-2 +govc vm.change \ + -e "guestinfo.talos.config=$(base64 controlplane.yaml)" \ + -e "disk.enableUUID=1" \ + -vm /ha-datacenter/vm/control-plane-2 +govc vm.clone -on=false -vm talos-$TALOS_VERSION control-plane-3 +govc vm.change \ + -e "guestinfo.talos.config=$(base64 controlplane.yaml)" \ + -e "disk.enableUUID=1" \ + -vm /ha-datacenter/vm/control-plane-3 +``` + +```bash +govc vm.change \ + -c 2 \ + -m 4096 \ + -vm /ha-datacenter/vm/control-plane-2 +govc vm.change \ + -c 2 \ + -m 4096 \ + -vm /ha-datacenter/vm/control-plane-3 +``` + +```bash +govc vm.disk.change -vm control-plane-2 -disk.name disk-1000-0 -size 10G +govc vm.disk.change -vm control-plane-3 -disk.name disk-1000-0 -size 10G +``` + +```bash +govc vm.power -on control-plane-2 +govc vm.power -on control-plane-3 +``` + +#### Update Settings for the Worker Nodes + +```bash +govc vm.clone -on=false -vm talos-$TALOS_VERSION worker-1 +govc vm.change \ + -e "guestinfo.talos.config=$(base64 join.yaml)" \ + -e "disk.enableUUID=1" \ + -vm /ha-datacenter/vm/worker-1 +govc vm.clone -on=false -vm talos-$TALOS_VERSION worker-2 +govc vm.change \ + -e "guestinfo.talos.config=$(base64 join.yaml)" \ + -e "disk.enableUUID=1" \ + -vm /ha-datacenter/vm/worker-2 +``` + +```bash +govc vm.change \ + -c 4 \ + -m 8192 \ + -vm /ha-datacenter/vm/worker-1 +govc vm.change \ + -c 4 \ + -m 8192 \ + -vm /ha-datacenter/vm/worker-2 +``` + +```bash +govc vm.disk.change -vm worker-1 -disk.name disk-1000-0 -size 50G +govc vm.disk.change -vm worker-2 -disk.name disk-1000-0 -size 50G +``` + +```bash +govc vm.power -on worker-1 +govc vm.power -on worker-2 +``` + +### Retrieve the `kubeconfig` + +At this point we can retrieve the admin `kubeconfig` by running: + +```bash +talosctl --talosconfig talosconfig config endpoint +talosctl --talosconfig talosconfig kubeconfig . +``` diff --git a/docs/website/content/v0.7/en/guides/getting-started/help.md b/docs/website/content/v0.7/en/guides/getting-started/help.md new file mode 100644 index 000000000..eb005aed3 --- /dev/null +++ b/docs/website/content/v0.7/en/guides/getting-started/help.md @@ -0,0 +1,20 @@ +--- +title: How to Get Help +--- + +There are a few ways to get help with your Talos project. + +### Open Source Community + +We have an active and helpful open source community who would be happy to give you assistance. +The best way to get a hold of us would be to join our [Slack channel](https://slack.dev.talos-systems.io/). +If you think you have found a bug, or would like to file a feature request, please use our [GitHub issue tracker](https://github.com/talos-systems/talos/issues). +We also hold weekly office hours on Zoom. +[Join the Google Group](https://groups.google.com/a/talos-systems.com/forum/#!forum/community) to receive calendar invitations to the scheduled meetings. + +### Commercial Support and Consulting + +If you are using Talos in a production setting, and need consulting services to get started or to integrate Talos into your existing environment, we can help. +Talos Systems, Inc. also offers support contracts with SLA (Service Level Agreement)-bound terms for mission-critical environments. + +[Learn More](https://www.talos-systems.com/subscription/) diff --git a/docs/website/content/v0.7/en/guides/getting-started/intro.md b/docs/website/content/v0.7/en/guides/getting-started/intro.md new file mode 100644 index 000000000..c55d92e0a --- /dev/null +++ b/docs/website/content/v0.7/en/guides/getting-started/intro.md @@ -0,0 +1,75 @@ +--- +title: Introduction to Talos +--- + +Welcome to the Talos documentation! +Talos is an open source platform to host and maintain Kubernetes clusters. +It includes a purpose-built operating system and associated management tools. +It can run on all major cloud providers, virtualization platforms, and bare metal hardware. + +All system management is done via an API, and there is no shell or interactive console. +Some of the capabilities and benefits provided by Talos include: + +- **Security**: Talos reduces your attack surface by practicing the Principle of Least Privilege (PoLP) and by securing the API with mutual TLS (mTLS) authentication. +- **Predictability**: Talos eliminates unneeded variables and reduces unknown factors in your environment by employing immutable infrastructure ideology. +- **Evolvability**: Talos simplifies your architecture and increases your ability to easily accommodate future changes. + +Talos is flexible and can be deployed in a variety of ways, but the easiest way to get started and experiment with the system is to run a local cluster on your laptop or workstation. +There are two options: + +- [Run a Docker-based local cluster](/docs/v0.7/en/guides/local/docker) on your Linux or Mac workstation +- [Run a Firecracker micro-VM-based](/docs/v0.7/en/guides/local/firecracker) cluster on your Linux workstation + +### System requirements + +Talos itself is a low resource OS, but since Talos goal is to run Kubernetes you need to have at least the following hardware requirements: + +#### Minimum requirements + + + + + + + + + + + + + + + + + + + + + +
RoleMemoryCores
Init/Control Plane2GB2
Worker1GB1
+ +#### Recommended + + + + + + + + + + + + + + + + + + + + + +
RoleMemoryCores
Init/Control Plane4GB4
Worker2GB2
+ +These requirements are similar to that of kubernetes. diff --git a/docs/website/content/v0.7/en/guides/getting-started/talosctl.md b/docs/website/content/v0.7/en/guides/getting-started/talosctl.md new file mode 100644 index 000000000..72c1881bb --- /dev/null +++ b/docs/website/content/v0.7/en/guides/getting-started/talosctl.md @@ -0,0 +1,190 @@ +--- +title: The talosctl +--- + +One of the important components of Talos is the CLI (Command Line Interface) which let's you interact with the OS running on your system. +This guide gives you some hands on examples, and some more context when working with `talosctl`. + +### Getting Started + +To get going with `talosctl` you need to download the latest release from Github [here](https://github.com/talos-systems/talos/releases). + +```bash +curl -Lo /usr/local/bin/talosctl https://github.com/talos-systems/talos/releases/latest/download/talosctl-$(uname -s | tr "[:upper:]" "[:lower:]")-amd64 +chmod +x /usr/local/bin/talosctl +``` + +Now, test if it's working by running: + +```bash +talosctl --help +``` + +### Commands + +#### Configuration of talosctl + +The `talosctl` command needs some configuration options to connect to the right node. +By default `talosctl` looks for a configuration file called `config` located at `$HOME/.talos`. + +If you created the configuration file using one of the guides, you'll have a file named: `talosconfig` which you can place inside the `.talos` directory and name it `config` for `talosctl` to automatically use that specified configuration. + +You can always override which configuration `talosctl` uses by specifing the `--talosconfig` parameter: + +```bash +talosctl --talosconfig other_talosconfig +``` + +#### Connecting to a Node + +> You need a working `talosconfig` before you can connect to a control-plane node, to get a `talosconfig` please follow one of the guides specific to your needs. +> We're assuming you have setup `talosctl` with a default `~/.talos/config`. + +Connect to a controlplane: + +```bash +talosctl config endpoint +``` + +You can then switch to another node: + +```bash +talosctl config nodes +``` + +> Pro tip! +> You can connect to multiple nodes at once, by seperating it with a space like this: +> +> ```bash +> talosctl config nodes node1.exmaple.org node2.example.org +> ``` +> +> You can use hostnames or ip's here as well, or mix and match. + +To verify what node you're currently connected to, you can run: + +```bash +$ talosctl version +Client: + Tag: v0.4.1 + SHA: a1234bc5 + Built: + Go version: go1.14.2 + OS/Arch: linux/amd64 + +Server: + NODE: 192.168.2.44 + Tag: v0.4.1 + SHA: a1234bc5 + Built: + Go version: go1.14.2 + OS/Arch: linux/amd64 +``` + +This will output something like above. + +### Getting Information From a Node + +#### Services + +Making sure all the services are running on a node is crucial for the operation of your Kubernetes cluster. +To identify all running services on a Talos node, you can run the `services` command. + +```bash +$ talosctl services +NODE SERVICE STATE HEALTH LAST CHANGE LAST EVENT +192.168.2.44 apid Running OK 192h7m40s ago Health check successful +192.168.2.44 bootkube Finished ? 192h5m1s ago Service finished successfully +192.168.2.44 containerd Running OK 192h7m47s ago Health check successful +192.168.2.44 etcd Running OK 192h6m56s ago Health check successful +192.168.2.44 kubelet Running OK 192h5m47s ago Health check successful +192.168.2.44 machined-api Running ? 192h7m48s ago Service started as goroutine +192.168.2.44 networkd Running OK 192h7m11s ago Health check successful +192.168.2.44 ntpd Running ? 192h7m10s ago Started task ntpd (PID 4144) for container ntpd +192.168.2.44 routerd Running OK 192h7m46s ago Started task routerd (PID 3907) for container routerd +192.168.2.44 system-containerd Running OK 192h7m48s ago Health check successful +192.168.2.44 trustd Running OK 192h7m45s ago Health check successful +192.168.2.44 udevd Running ? 192h7m47s ago Process Process(["/sbin/udevd" "--resolve-names=never" "-D"]) started with PID 2893 +192.168.2.44 udevd-trigger Finished ? 192h7m47s ago Service finished successfully +``` + +> Note: above command is run on a controlplane node, a worker node has different services. + +#### Containers + +Sometimes it's neccessary to check for certain containers on Talos itself. +This can be achieved by the `containers` subcommand: + +```bash +$ talosctl containers +NODE NAMESPACE ID IMAGE PID STATUS +192.168.2.44 system apid talos/apid 4021 RUNNING +192.168.2.44 system networkd talos/networkd 3893 RUNNING +192.168.2.44 system ntpd talos/ntpd 4144 RUNNING +192.168.2.44 system routerd talos/routerd 3907 RUNNING +192.168.2.44 system trustd talos/trustd 4010 RUNNING +``` + +> For the keyboard warriors: `talosctl c` works as well, saves you 9 characters. + +To verify the contrainers running on the hosts that live in the Kubernetes namespace: + +```bash +$ talosctl containers -k +NODE NAMESPACE ID IMAGE PID STATUS +192.168.2.44 k8s.io kube-system/coredns-669d45d65b-st7sl k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 6632 RUNNING +192.168.2.44 k8s.io └─ kube-system/coredns-669d45d65b-st7sl:coredns k8s.gcr.io/coredns@sha256:7ec975f167d815311a7136c32e70735f0d00b73781365df1befd46ed35bd4fe7 6719 RUNNING +192.168.2.44 k8s.io kube-system/coredns-669d45d65b-zt586 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 6587 RUNNING +192.168.2.44 k8s.io └─ kube-system/coredns-669d45d65b-zt586:coredns k8s.gcr.io/coredns@sha256:7ec975f167d815311a7136c32e70735f0d00b73781365df1befd46ed35bd4fe7 6712 RUNNING +192.168.2.44 k8s.io kube-system/kube-apiserver-6lrdp k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 5511 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-apiserver-6lrdp:kube-apiserver k8s.gcr.io/hyperkube:v1.18.0 6167 RUNNING +192.168.2.44 k8s.io kube-system/kube-controller-manager-p6zpr k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 5807 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-controller-manager-p6zpr:kube-controller-manager k8s.gcr.io/hyperkube:v1.18.0 5844 RUNNING +192.168.2.44 k8s.io kube-system/kube-flannel-xr89l k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 5152 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-flannel-xr89l:install-cni quay.io/coreos/flannel-cni:v0.3.0 5332 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-flannel-xr89l:kube-flannel quay.io/coreos/flannel:v0.11.0-amd64 5197 RUNNING +192.168.2.44 k8s.io kube-system/kube-proxy-9bh74 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 4999 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-proxy-9bh74:kube-proxy k8s.gcr.io/hyperkube:v1.18.0 5031 RUNNING +192.168.2.44 k8s.io kube-system/kube-scheduler-k87t8 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 5714 RUNNING +192.168.2.44 k8s.io └─ kube-system/kube-scheduler-k87t8:kube-scheduler k8s.gcr.io/hyperkube:v1.18.0 5745 RUNNING +192.168.2.44 k8s.io kube-system/pod-checkpointer-c5hk6 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 5512 RUNNING +192.168.2.44 k8s.io kube-system/pod-checkpointer-c5hk6-talos-10-32-2-197 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 6341 RUNNING +192.168.2.44 k8s.io └─ kube-system/pod-checkpointer-c5hk6-talos-10-32-2-197:pod-checkpointer docker.io/autonomy/pod-checkpointer@sha256:476277082931570df3c863ad37ab11f0ad7050710caf02ba46d053837fe6e366 6374 RUNNING +192.168.2.44 k8s.io └─ kube-system/pod-checkpointer-c5hk6:pod-checkpointer docker.io/autonomy/pod-checkpointer@sha256:476277082931570df3c863ad37ab11f0ad7050710caf02ba46d053837fe6e366 5927 RUNNING +192.168.2.44 k8s.io kubelet k8s.gcr.io/hyperkube:v1.18.0 4885 RUNNING +192.168.2.44 k8s.io metallb-system/speaker-2rbf7 k8s.gcr.io/pause@sha256:f78411e19d84a252e53bff71a4407a5686c46983a2c2eeed83929b888179acea 84985 RUNNING +192.168.2.44 k8s.io └─ metallb-system/speaker-2rbf7:speaker docker.io/metallb/speaker@sha256:2b74eca0f25e946e9a1dc4b94b9da067b1fec4244364d266283dfbbab546a629 85033 RUNNING +``` + +#### Logs + +Retrieving logs is also done through `talosctl`. +Using the previous commands to look up containers, we can use the _ID_ to get the logs from a specific container. + +```bash +$ talosctl logs apid +192.168.2.44: 2020/05/19 14:14:24.715975 provider.go:109: next renewal in 11h59m59.642046025s +192.168.2.44: 2020/05/19 14:14:34.684449 log.go:98: OK [/machine.MachineService/ServiceList] 5.355187ms stream Success (:authority=192.168.2.44:50000;content-type=application/grpc;user-agent=grpc-go/1.26.0) +192.168.2.44: 2020/05/19 14:16:04.379499 log.go:98: OK [/machine.MachineService/ServiceList] 2.60977ms stream Success (:authority=192.168.2.44:50000;content-type=application/grpc;user-agent=grpc-go/1.26.0) +192.168.2.44: 2020/05/19 14:17:50.498066 log.go:98: OK [/machine.MachineService/ServiceList] 2.489054ms stream Success (:authority=192.168.2.44:50000;content-type=application/grpc;user-agent=grpc-go/1.26.0) +..... +``` + +> To get kubernetes logs, you need to specify the `-k` parameter and the _ID_ + +#### Copy Files + +Sometimes you just need to copy over some files from the host machine, and troubleshoot on you local machine. +This can be done through the `copy` command. + +```bash +talosctl copy /var/log/pods/ ./pods +``` + +> You can also use `cp` instead of `copy` + +This will copy all logs located in `/var/log/pods/` to your local machine in the directory `pods`. + +### Next Steps + +To get all options available, please have a look at the [Git repo](https://github.com/talos-systems/talos/blob/master/docs/talosctl/talosctl.md) diff --git a/docs/website/content/v0.7/en/guides/local/docker.md b/docs/website/content/v0.7/en/guides/local/docker.md new file mode 100644 index 000000000..32773b1fd --- /dev/null +++ b/docs/website/content/v0.7/en/guides/local/docker.md @@ -0,0 +1,48 @@ +--- +title: Docker +--- + +In this guide we will create a Kubernetes cluster in Docker, using a containerized version of Talos. + +Running Talos in Docker is intended to be used in CI pipelines, and local testing when you need a quick and easy cluster. +Furthermore, if you are running Talos in production, it provides an excellent way for developers to develop against the same version of Talos. + +## Requirements + +The follow are requirements for running Talos in Docker: + +- Docker 18.03 or greater +- a recent version of [`talosctl`](https://github.com/talos-systems/talos/releases) + +## Create the Cluster + +Creating a local cluster is as simple as: + +```bash +talosctl cluster create --wait +``` + +Once the above finishes successfully, your talosconfig(`~/.talos/config`) will be configured to point to the new cluster. + +> Note: Startup times can take up to a minute before the cluster is available. + +## Retrieve and Configure the `kubeconfig` + +```bash +talosctl kubeconfig . +kubectl --kubeconfig kubeconfig config set-cluster talos_default --server https://127.0.0.1:6443 +``` + +## Using the Cluster + +Once the cluster is available, you can make use of `talosctl` and `kubectl` to interact with the cluster. +For example, to view current running containers, run `talosctl containers` for a list of containers in the `system` namespace, or `talosctl containers -k` for the `k8s.io` namespace. +To view the logs of a container, use `talosctl logs ` or `talosctl logs -k `. + +## Cleaning Up + +To cleanup, run: + +```bash +talosctl cluster destroy +``` diff --git a/docs/website/content/v0.7/en/guides/local/firecracker.md b/docs/website/content/v0.7/en/guides/local/firecracker.md new file mode 100644 index 000000000..4207a67d7 --- /dev/null +++ b/docs/website/content/v0.7/en/guides/local/firecracker.md @@ -0,0 +1,313 @@ +--- +title: Firecracker +--- + +In this guide we will create a Kubernetes cluster using Firecracker. + +## Requirements + +- Linux +- a kernel with + - KVM enabled (`/dev/kvm` must exist) + - `CONFIG_NET_SCH_NETEM` enabled + - `CONFIG_NET_SCH_INGRESS` enabled +- at least `CAP_SYS_ADMIN` and `CAP_NET_ADMIN` capabilities +- [firecracker](https://github.com/firecracker-microvm/firecracker/releases) (v0.21.0 or higher) +- `bridge`, `static` and `firewall` CNI plugins from the [standard CNI plugins](https://github.com/containernetworking/cni), and `tc-redirect-tap` CNI plugin from the [awslabs tc-redirect-tap](https://github.com/awslabs/tc-redirect-tap) installed to `/opt/cni/bin` +- iptables +- `/etc/cni/conf.d` directory should exist +- `/var/run/netns` directory should exist + +## Installation + +### How to get firecracker (v0.21.0 or higher) + +You can download `firecracker` binary via +[github.com/firecracker-microvm/firecracker/releases](https://github.com/firecracker-microvm/firecracker/releases) + +```bash +curl https://github.com/firecracker-microvm/firecracker/releases/download//firecracker-- -L -o firecracker +``` + +For example version `v0.21.1` for `linux` platform: + +```bash +curl https://github.com/firecracker-microvm/firecracker/releases/download/v0.21.1/firecracker-v0.21.1-x86_64 -L -o firecracker +sudo cp firecracker /usr/local/bin +sudo chmod +x /usr/local/bin/firecracker +``` + +### Install talosctl + +You can download `talosctl` and all required binaries via +[github.com/talos-systems/talos/releases](https://github.com/talos-systems/talos/releases) + +```bash +curl https://github.com/talos-systems/talos/releases/download//talosctl-- -L -o talosctl +``` + +For example version `v0.7.0` for `linux` platform: + +```bash +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/talosctl-linux-amd64 -L -o talosctl +sudo cp talosctl /usr/local/bin +sudo chmod +x /usr/local/bin/talosctl +``` + +### Install bridge, firewall and static required CNI plugins + +You can download standard CNI required plugins via +[github.com/containernetworking/plugins/releases](https://github.com/containernetworking/plugins/releases) + +```bash +curl https://github.com/containernetworking/plugins/releases/download//cni-plugins---tgz -L -o cni-plugins---.tgz +``` + +For example version `v0.8.5` for `linux` platform: + +```bash +curl https://github.com/containernetworking/plugins/releases/download/v0.8.5/cni-plugins-linux-amd64-v0.8.5.tgz -L -o cni-plugins-linux-amd64-v0.8.5.tgz +mkdir cni-plugins-linux +tar -xf cni-plugins-linux-amd64-v0.8.5.tgz -C cni-plugins-linux +sudo mkdir -p /opt/cni/bin +sudo cp cni-plugins-linux/{bridge,firewall,static} /opt/cni/bin +``` + +### Install tc-redirect-tap CNI plugin + +You should install CNI plugin from the `tc-redirect-tap` repository [github.com/awslabs/tc-redirect-tap](https://github.com/awslabs/tc-redirect-tap) + +```bash +go get -d github.com/awslabs/tc-redirect-tap/cmd/tc-redirect-tap +cd $GOPATH/src/github.com/awslabs/tc-redirect-tap +make all +sudo cp tc-redirect-tap /opt/cni/bin +``` + +> Note: if `$GOPATH` is not set, it defaults to `~/go`. + +## Install Talos kernel and initramfs + +Firecracker provisioner depends on Talos uncompressed kernel (`vmlinuz`) and initramfs (`initramfs.xz`). +These files can be downloaded from the Talos release: + +```bash +mkdir -p _out/ +curl https://github.com/talos-systems/talos/releases/download//vmlinuz -L -o _out/vmlinuz +curl https://github.com/talos-systems/talos/releases/download//initramfs.xz -L -o _out/initramfs.xz +``` + +For example version `v0.7.0`: + +```bash +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/vmlinuz -L -o _out/vmlinuz +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/initramfs.xz -L -o _out/initramfs.xz +``` + +## Create the Cluster + +```bash +sudo talosctl cluster create --provisioner firecracker +``` + +Once the above finishes successfully, your talosconfig(`~/.talos/config`) will be configured to point to the new cluster. + +## Retrieve and Configure the `kubeconfig` + +```bash +talosctl kubeconfig . +``` + +## Using the Cluster + +Once the cluster is available, you can make use of `talosctl` and `kubectl` to interact with the cluster. +For example, to view current running containers, run `talosctl containers` for a list of containers in the `system` namespace, or `talosctl containers -k` for the `k8s.io` namespace. +To view the logs of a container, use `talosctl logs ` or `talosctl logs -k `. + +A bridge interface will be created, and assigned the default IP 10.5.0.1. +Each node will be directly accessible on the subnet specified at cluster creation time. +A loadbalancer runs on 10.5.0.1 by default, which handles loadbalancing for the Talos, and Kubernetes APIs. + +You can see a summary of the cluster state by running: + +```bash +$ talosctl cluster show --provisioner firecracker +PROVISIONER firecracker +NAME talos-default +NETWORK NAME talos-default +NETWORK CIDR 10.5.0.0/24 +NETWORK GATEWAY 10.5.0.1 +NETWORK MTU 1500 + +NODES: + +NAME TYPE IP CPU RAM DISK +talos-default-master-1 Init 10.5.0.2 1.00 1.6 GB 4.3 GB +talos-default-master-2 ControlPlane 10.5.0.3 1.00 1.6 GB 4.3 GB +talos-default-master-3 ControlPlane 10.5.0.4 1.00 1.6 GB 4.3 GB +talos-default-worker-1 Join 10.5.0.5 1.00 1.6 GB 4.3 GB +``` + +## Cleaning Up + +To cleanup, run: + +```bash +sudo talosctl cluster destroy --provisioner firecracker +``` + +> Note: In that case that the host machine is rebooted before destroying the cluster, you may need to manually remove `~/.talos/clusters/talos-default`. + +## Manual Clean Up + +The `talosctl cluster destroy` command depends heavily on the clusters state directory. +It contains all related information of the cluster. +The PIDs and network associated with the cluster nodes. + +If you happened to have deleted the state folder by mistake or you would like to cleanup +the environment, here are the steps how to do it manually: + +### Stopping VMs + +Find the process of `firecracker --api-sock` execute: + +```bash +ps -elf | grep '[f]irecracker --api-sock' +``` + +To stop the VMs manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where VMs are running with PIDs **158065** and **158216** + +```bash +ps -elf | grep '[f]irecracker --api-sock' +4 S root 158065 157615 44 80 0 - 264152 - 07:54 ? 00:34:25 firecracker --api-sock /root/.talos/clusters/k8s/k8s-master-1.sock +4 S root 158216 157617 18 80 0 - 264152 - 07:55 ? 00:14:47 firecracker --api-sock /root/.talos/clusters/k8s/k8s-worker-1.sock +sudo kill -s SIGTERM 158065 +sudo kill -s SIGTERM 158216 +``` + +### Remove VMs + +Find the process of `talosctl firecracker-launch` execute: + +```bash +ps -elf | grep 'talosctl firecracker-launch' +``` + +To remove the VMs manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where VMs are running with PIDs **157615** and **157617** + +```bash +ps -elf | grep '[t]alosctl firecracker-launch' +0 S root 157615 2835 0 80 0 - 184934 - 07:53 ? 00:00:00 talosctl firecracker-launch +0 S root 157617 2835 0 80 0 - 185062 - 07:53 ? 00:00:00 talosctl firecracker-launch +sudo kill -s SIGTERM 157615 +sudo kill -s SIGTERM 157617 +``` + +### Remove load balancer + +Find the process of `talosctl loadbalancer-launch` execute: + +```bash +ps -elf | grep 'talosctl loadbalancer-launch' +``` + +To remove the LB manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where loadbalancer is running with PID **157609** + +```bash +ps -elf | grep '[t]alosctl loadbalancer-launch' +4 S root 157609 2835 0 80 0 - 184998 - 07:53 ? 00:00:07 talosctl loadbalancer-launch --loadbalancer-addr 10.5.0.1 --loadbalancer-upstreams 10.5.0.2 +sudo kill -s SIGTERM 157609 +``` + +### Remove network + +This is more tricky part as if you have already deleted the state folder. +If you didn't then it is written in the `state.yaml` in the +`/root/.talos/clusters/` directory. + +```bash +sudo cat /root/.talos/clusters//state.yaml | grep bridgename +bridgename: talos +``` + +If you only had one cluster, then it will be the interface with name +`talos` + +```bash +46: talos: mtu 1500 qdisc noqueue state DOWN group default qlen 1000 + link/ether a6:72:f4:0a:d3:9c brd ff:ff:ff:ff:ff:ff + inet 10.5.0.1/24 brd 10.5.0.255 scope global talos17c13299 + valid_lft forever preferred_lft forever + inet6 fe80::a472:f4ff:fe0a:d39c/64 scope link + valid_lft forever preferred_lft forever +``` + +To remove this interface: + +```bash +sudo ip link del talos +``` + +### Remove state directory + +To remove the state directory execute: + +```bash +sudo rm -Rf /root/.talos/clusters/ +``` + +## Troubleshooting + +### Logs + +Inspect logs directory + +```bash +sudo cat /root/.talos/clusters//*.log +``` + +Logs are saved under `--.log` + +For example in case of **k8s** cluster name: + +```bash +sudo ls -la /root/.talos/clusters/k8s | grep log +-rw-r--r--. 1 root root 69415 Apr 26 20:58 k8s-master-1.log +-rw-r--r--. 1 root root 68345 Apr 26 20:58 k8s-worker-1.log +-rw-r--r--. 1 root root 24621 Apr 26 20:59 lb.log +``` + +Inspect logs during the installation + +```bash +sudo su - +tail -f /root/.talos/clusters//*.log +``` + +## Post-installation + +After executing these steps and you should be able to use `kubectl` + +```bash +sudo talosctl kubeconfig . +mv kubeconfig $HOME/.kube/config +sudo chown $USER:$USER $HOME/.kube/config +``` diff --git a/docs/website/content/v0.7/en/guides/local/qemu.md b/docs/website/content/v0.7/en/guides/local/qemu.md new file mode 100644 index 000000000..8d6842e4e --- /dev/null +++ b/docs/website/content/v0.7/en/guides/local/qemu.md @@ -0,0 +1,324 @@ +--- +title: QEMU +--- + +In this guide we will create a Kubernetes cluster using QEMU. + +## Requirements + +- Linux +- a kernel with + - KVM enabled (`/dev/kvm` must exist) + - `CONFIG_NET_SCH_NETEM` enabled + - `CONFIG_NET_SCH_INGRESS` enabled +- at least `CAP_SYS_ADMIN` and `CAP_NET_ADMIN` capabilities +- QEMU +- `bridge`, `static` and `firewall` CNI plugins from the [standard CNI plugins](https://github.com/containernetworking/cni), and `tc-redirect-tap` CNI plugin from the [awslabs tc-redirect-tap](https://github.com/awslabs/tc-redirect-tap) installed to `/opt/cni/bin` +- iptables +- `/etc/cni/conf.d` directory should exist +- `/var/run/netns` directory should exist + +## Installation + +### How to get QEMU + +Install QEMU with your operating system package manager. +For example, on Ubuntu for x86: + +```bash +apt install qemu-system-x86 qemu-kvm +``` + +### Install talosctl + +You can download `talosctl` and all required binaries via +[github.com/talos-systems/talos/releases](https://github.com/talos-systems/talos/releases) + +```bash +curl https://github.com/talos-systems/talos/releases/download//talosctl-- -L -o talosctl +``` + +For example version `v0.7.0` for `linux` platform: + +```bash +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/talosctl-linux-amd64 -L -o talosctl +sudo cp talosctl /usr/local/bin +sudo chmod +x /usr/local/bin/talosctl +``` + +### Install bridge, firewall and static required CNI plugins + +You can download standard CNI required plugins via +[github.com/containernetworking/plugins/releases](https://github.com/containernetworking/plugins/releases) + +```bash +curl https://github.com/containernetworking/plugins/releases/download//cni-plugins---tgz -L -o cni-plugins---.tgz +``` + +For example version `v0.8.5` for `linux` platform: + +```bash +curl https://github.com/containernetworking/plugins/releases/download/v0.8.5/cni-plugins-linux-amd64-v0.8.5.tgz -L -o cni-plugins-linux-amd64-v0.8.5.tgz +mkdir cni-plugins-linux +tar -xf cni-plugins-linux-amd64-v0.8.5.tgz -C cni-plugins-linux +sudo mkdir -p /opt/cni/bin +sudo cp cni-plugins-linux/{bridge,firewall,static} /opt/cni/bin +``` + +### Install tc-redirect-tap CNI plugin + +You should install CNI plugin from the `tc-redirect-tap` repository [github.com/awslabs/tc-redirect-tap](https://github.com/awslabs/tc-redirect-tap) + +```bash +go get -d github.com/awslabs/tc-redirect-tap/cmd/tc-redirect-tap +cd $GOPATH/src/github.com/awslabs/tc-redirect-tap +make all +sudo cp tc-redirect-tap /opt/cni/bin +``` + +> Note: if `$GOPATH` is not set, it defaults to `~/go`. + +## Install Talos kernel and initramfs + +QEMU provisioner depends on Talos kernel (`vmlinuz`) and initramfs (`initramfs.xz`). +These files can be downloaded from the Talos release: + +```bash +mkdir -p _out/ +curl https://github.com/talos-systems/talos/releases/download//vmlinuz -L -o _out/vmlinuz +curl https://github.com/talos-systems/talos/releases/download//initramfs.xz -L -o _out/initramfs.xz +``` + +For example version `v0.7.0`: + +```bash +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/vmlinuz -L -o _out/vmlinuz +curl https://github.com/talos-systems/talos/releases/download/v0.7.0/initramfs.xz -L -o _out/initramfs.xz +``` + +## Create the Cluster + +For the first time, create root state directory as your user so that you can inspect the logs as non-root user: + +```bash +mkdir -p ~/.talos/clusters +``` + +Create the cluster: + +```bash +sudo -E talosctl cluster create --provisioner qemu +``` + +Once the above finishes successfully, your talosconfig(`~/.talos/config`) will be configured to point to the new cluster. + +## Retrieve and Configure the `kubeconfig` + +```bash +talosctl -n 10.5.0.2 kubeconfig . +``` + +## Using the Cluster + +Once the cluster is available, you can make use of `talosctl` and `kubectl` to interact with the cluster. +For example, to view current running containers, run `talosctl containers` for a list of containers in the `system` namespace, or `talosctl containers -k` for the `k8s.io` namespace. +To view the logs of a container, use `talosctl logs ` or `talosctl logs -k `. + +A bridge interface will be created, and assigned the default IP 10.5.0.1. +Each node will be directly accessible on the subnet specified at cluster creation time. +A loadbalancer runs on 10.5.0.1 by default, which handles loadbalancing for the Talos, and Kubernetes APIs. + +You can see a summary of the cluster state by running: + +```bash +$ talosctl cluster show --provisioner qemu +PROVISIONER qemu +NAME talos-default +NETWORK NAME talos-default +NETWORK CIDR 10.5.0.0/24 +NETWORK GATEWAY 10.5.0.1 +NETWORK MTU 1500 + +NODES: + +NAME TYPE IP CPU RAM DISK +talos-default-master-1 Init 10.5.0.2 1.00 1.6 GB 4.3 GB +talos-default-master-2 ControlPlane 10.5.0.3 1.00 1.6 GB 4.3 GB +talos-default-master-3 ControlPlane 10.5.0.4 1.00 1.6 GB 4.3 GB +talos-default-worker-1 Join 10.5.0.5 1.00 1.6 GB 4.3 GB +``` + +## Cleaning Up + +To cleanup, run: + +```bash +sudo -E talosctl cluster destroy --provisioner qemu +``` + +> Note: In that case that the host machine is rebooted before destroying the cluster, you may need to manually remove `~/.talos/clusters/talos-default`. + +## Manual Clean Up + +The `talosctl cluster destroy` command depends heavily on the clusters state directory. +It contains all related information of the cluster. +The PIDs and network associated with the cluster nodes. + +If you happened to have deleted the state folder by mistake or you would like to cleanup +the environment, here are the steps how to do it manually: + +### Remove VM Launchers + +Find the process of `talosctl qemu-launch`: + +```bash +ps -elf | grep 'talosctl qemu-launch' +``` + +To remove the VMs manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where VMs are running with PIDs **157615** and **157617** + +```bash +ps -elf | grep '[t]alosctl qemu-launch' +0 S root 157615 2835 0 80 0 - 184934 - 07:53 ? 00:00:00 talosctl qemu-launch +0 S root 157617 2835 0 80 0 - 185062 - 07:53 ? 00:00:00 talosctl qemu-launch +sudo kill -s SIGTERM 157615 +sudo kill -s SIGTERM 157617 +``` + +### Stopping VMs + +Find the process of `qemu-system`: + +```bash +ps -elf | grep 'qemu-system' +``` + +To stop the VMs manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where VMs are running with PIDs **158065** and **158216** + +```bash +ps -elf | grep qemu-system +2 S root 1061663 1061168 26 80 0 - 1786238 - 14:05 ? 01:53:56 qemu-system-x86_64 -m 2048 -drive format=raw,if=virtio,file=/home/username/.talos/clusters/talos-default/bootstrap-master.disk -smp cpus=2 -cpu max -nographic -netdev tap,id=net0,ifname=tap0,script=no,downscript=no -device virtio-net-pci,netdev=net0,mac=1e:86:c6:b4:7c:c4 -device virtio-rng-pci -no-reboot -boot order=cn,reboot-timeout=5000 -smbios type=1,uuid=7ec0a73c-826e-4eeb-afd1-39ff9f9160ca -machine q35,accel=kvm +2 S root 1061663 1061170 67 80 0 - 621014 - 21:23 ? 00:00:07 qemu-system-x86_64 -m 2048 -drive format=raw,if=virtio,file=/homeusername/.talos/clusters/talos-default/pxe-1.disk -smp cpus=2 -cpu max -nographic -netdev tap,id=net0,ifname=tap0,script=no,downscript=no -device virtio-net-pci,netdev=net0,mac=36:f3:2f:c3:9f:06 -device virtio-rng-pci -no-reboot -boot order=cn,reboot-timeout=5000 -smbios type=1,uuid=ce12a0d0-29c8-490f-b935-f6073ab916a6 -machine q35,accel=kvm +sudo kill -s SIGTERM 1061663 +sudo kill -s SIGTERM 1061663 +``` + +### Remove load balancer + +Find the process of `talosctl loadbalancer-launch`: + +```bash +ps -elf | grep 'talosctl loadbalancer-launch' +``` + +To remove the LB manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where loadbalancer is running with PID **157609** + +```bash +ps -elf | grep '[t]alosctl loadbalancer-launch' +4 S root 157609 2835 0 80 0 - 184998 - 07:53 ? 00:00:07 talosctl loadbalancer-launch --loadbalancer-addr 10.5.0.1 --loadbalancer-upstreams 10.5.0.2 +sudo kill -s SIGTERM 157609 +``` + +### Remove DHCP server + +Find the process of `talosctl dhcpd-launch`: + +```bash +ps -elf | grep 'talosctl dhcpd-launch' +``` + +To remove the LB manually, execute: + +```bash +sudo kill -s SIGTERM +``` + +Example output, where loadbalancer is running with PID **157609** + +```bash +ps -elf | grep '[t]alosctl dhcpd-launch' +4 S root 157609 2835 0 80 0 - 184998 - 07:53 ? 00:00:07 talosctl dhcpd-launch --state-path /home/username/.talos/clusters/talos-default --addr 10.5.0.1 --interface talosbd9c32bc +sudo kill -s SIGTERM 157609 +``` + +### Remove network + +This is more tricky part as if you have already deleted the state folder. +If you didn't then it is written in the `state.yaml` in the +`~/.talos/clusters/` directory. + +```bash +sudo cat ~/.talos/clusters//state.yaml | grep bridgename +bridgename: talos +``` + +If you only had one cluster, then it will be the interface with name +`talos` + +```bash +46: talos: mtu 1500 qdisc noqueue state DOWN group default qlen 1000 + link/ether a6:72:f4:0a:d3:9c brd ff:ff:ff:ff:ff:ff + inet 10.5.0.1/24 brd 10.5.0.255 scope global talos17c13299 + valid_lft forever preferred_lft forever + inet6 fe80::a472:f4ff:fe0a:d39c/64 scope link + valid_lft forever preferred_lft forever +``` + +To remove this interface: + +```bash +sudo ip link del talos +``` + +### Remove state directory + +To remove the state directory execute: + +```bash +sudo rm -Rf /home/$USER/.talos/clusters/ +``` + +## Troubleshooting + +### Logs + +Inspect logs directory + +```bash +sudo cat ~/.talos/clusters//*.log +``` + +Logs are saved under `--.log` + +For example in case of **k8s** cluster name: + +```bash +ls -la ~/.talos/clusters/k8s | grep log +-rw-r--r--. 1 root root 69415 Apr 26 20:58 k8s-master-1.log +-rw-r--r--. 1 root root 68345 Apr 26 20:58 k8s-worker-1.log +-rw-r--r--. 1 root root 24621 Apr 26 20:59 lb.log +``` + +Inspect logs during the installation + +```bash +tail -f ~/.talos/clusters//*.log +``` diff --git a/docs/website/content/v0.7/en/guides/local/registry-cache.md b/docs/website/content/v0.7/en/guides/local/registry-cache.md new file mode 100644 index 000000000..a89338c8e --- /dev/null +++ b/docs/website/content/v0.7/en/guides/local/registry-cache.md @@ -0,0 +1,96 @@ +--- +title: Registry Cache +--- + +In this guide we will create a set of local caching Docker registry proxies to minimize local cluster startup time. + +When running Talos locally, pulling images from Docker registries might take a significant amount of time. +We spin up local caching pass-through registries to cache images and configure a local Talos cluster to use those proxies. +A similar approach might be used to run Talos in production in air-gapped environments. +It can be also used to verify that all the images are available in local registries. + +## Requirements + +The follow are requirements for creating the set of caching proxies: + +- Docker 18.03 or greater +- Local cluster requirements for either [docker](docker) or [fireracker](firecracker). + +## Launch the Caching Docker Registry Proxies + +Talos pulls from `docker.io`, `k8s.gcr.io`, `gcr.io` and `quay.io` by default. +If your configuration is different, you might need to modify the commands below: + +```bash +docker run -d -p 5000:5000 \ + -e REGISTRY_PROXY_REMOTEURL=https://registry-1.docker.io \ + --restart always \ + --name registry-docker.io registry:2 + +docker run -d -p 5001:5000 \ + -e REGISTRY_PROXY_REMOTEURL=https://k8s.gcr.io \ + --restart always \ + --name registry-k8s.gcr.io registry:2 + +docker run -d -p 5002:5000 \ + -e REGISTRY_PROXY_REMOTEURL=https://quay.io \ + --restart always \ + --name registry-quay.io registry:2.5 + +docker run -d -p 5003:5000 \ + -e REGISTRY_PROXY_REMOTEURL=https://gcr.io \ + --restart always \ + --name registry-gcr.io registry:2 +``` + +> Note: Proxies are started as docker containers, and they're automatically configured to start with Docker daemon. +Please note that `quay.io` proxy doesn't support recent Docker image schema, so we run older registry image version (2.5). + +As a registry container can only handle a single upstream Docker registry, we launch a container per upstream, each on its own +host port (5000, 5001, 5002). + +## Using Caching Registries with `firecracker` Local Cluster + +With a [firecracker](firecracker) local cluster, a bridge interface is created on the host. +As registry containers expose their ports on the host, we can use bridge IP to direct proxy requests. + +```bash +sudo talosctl cluster create --provisioner firecracker \ + --registry-mirror docker.io=http://10.5.0.1:5000 \ + --registry-mirror k8s.gcr.io=http://10.5.0.1:5001 \ + --registry-mirror quay.io=http://10.5.0.1:5002 \ + --registry-mirror gcr.io=http://10.5.0.1:5003 +``` + +The Talos local cluster should now start pulling via caching registries. +This can be verified via registry logs, e.g. `docker logs -f registry-docker.io`. +The first time cluster boots, images are pulled and cached, so next cluster boot should be much faster. + +> Note: `10.5.0.1` is a bridge IP with default network (`10.5.0.0/24`), if using custom `--cidr`, value should be adjusted accordingly. + +## Using Caching Registries with `docker` Local Cluster + +With a [docker](docker) local cluster we can use docker bridge IP, default value for that IP is `172.17.0.1`. +On Linux, the docker bridge address can be inspected with `ip addr show docker0`. + +```bash +talosctl cluster create --provisioner docker \ + --registry-mirror docker.io=http://172.17.0.1:5000 \ + --registry-mirror k8s.gcr.io=http://172.17.0.1:5001 \ + --registry-mirror quay.io=http://172.17.0.1:5002 \ + --registry-mirror gcr.io=http://172.17.0.1:5003 +``` + +## Cleaning Up + +To cleanup, run: + +```bash +docker rm -f registry-docker.io +docker rm -f registry-k8s.gcr.io +docker rm -f registry-quay.io +docker rm -f registry-gcr.io +``` + +> Note: Removing docker registry containers also removes the image cache. +So if you plan to use caching registries, keep the containers running. diff --git a/docs/website/content/v0.7/en/guides/metal/arges.md b/docs/website/content/v0.7/en/guides/metal/arges.md new file mode 100644 index 000000000..f69a52bae --- /dev/null +++ b/docs/website/content/v0.7/en/guides/metal/arges.md @@ -0,0 +1,16 @@ +--- +title: "Arges: Flexible Provisioning for Kubernetes" +--- + +> Arges is an alpha-level project and is in active development. +> If you need help or have questions, please get in touch with us by Slack or GitHub! +> If you want to skip right to the code, check it out on GitHub: [talos-systems/arges](https://github.com/talos-systems/arges) + +The goal of the Arges project is to provide Talos users with a robust and reliable way to build and manage bare metal Talos-based Kubernetes clusters, as well as manage cloud-based clusters. +We've tried to achieve this by building out a set of tools to help solve the traditional datacenter bootstrapping problems. +These tools include an asset management server, a metadata server, and a pair of Cluster API-aware providers for infrastructure provisioning and config generation. + + + +Since Arges is currently in active development, the best place to start will be the [project README](https://github.com/talos-systems/arges/blob/master/README.md). +In the GitHub repository, you can find an [example project](https://github.com/talos-systems/arges/blob/master/examples/README.md) showing one method of deployment. diff --git a/docs/website/content/v0.7/en/guides/metal/digitalrebar.md b/docs/website/content/v0.7/en/guides/metal/digitalrebar.md new file mode 100644 index 000000000..14917f3b3 --- /dev/null +++ b/docs/website/content/v0.7/en/guides/metal/digitalrebar.md @@ -0,0 +1,158 @@ +--- +title: "Digital Rebar" +--- + +## Prerequisites + +- 3 nodes (please see [hardware requirements](https://www.talos.dev/docs/v0.7/en/guides/getting-started#system-requirements)) +- Loadbalancer +- Digital Rebar Server +- Talosctl access (see [talosctl setup](https://www.talos.dev/docs/v0.7/en/guides/getting-started/talosctl)) + +## Creating a Cluster + +In this guide we will create an Kubernetes cluster with 1 worker node, and 2 controlplane nodes. +We assume an existing digital rebar deployment, and some familiarity with iPXE. + +We leave it up to the user to decide if they would like to use static networking, or DHCP. +The setup and configuration of DHCP will not be covered. + +### Create the Machine Configuration Files + +#### Generating Base Configurations + +Using the DNS name of the load balancer, generate the base configuration files for the Talos machines: + +```bash +$ talosctl gen config talos-k8s-metal-tutorial https://: +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +>The loadbalancer is used to distribute the load across multiple controlplane nodes. +>This isn't covered in detail, because we asume some loadbalancing knowledge before hand. +>If you think this should be added to the docs, please [create a issue](https://github.com/talos-systems/talos/issues). + +At this point, you can modify the generated configs to your liking. + +#### Validate the Configuration Files + +```bash +$ talosctl validate --config init.yaml --mode metal +init.yaml is valid for metal mode +$ talosctl validate --config controlplane.yaml --mode metal +controlplane.yaml is valid for metal mode +$ talosctl validate --config join.yaml --mode metal +join.yaml is valid for metal mode +``` + +#### Publishing the Machine Configuration Files + +Digital Rebar has a build-in fileserver, which means we can use this feature to expose the talos configuration files. +We will place `init.yaml`, `controlplane.yaml`, and `worker.yaml` into Digital Rebar file server by using the `drpcli` tools. + +Copy the generated files from the step above into your Digital Rebar installation. + +```bash +drpcli file upload .yaml as .yaml +``` + +Replacing `` with init, controlplane or worker. + +### Download the boot files + +Download a recent version of `boot.tar.gz` from [github.](https://github.com/talos-systems/talos/releases/) + +Upload to DRB: + +```bash +$ drpcli isos upload boot.tar.gz as talos-0.7.tar.gz +{ + "Path": "talos-0.7.tar.gz", + "Size": 96470072 +} +``` + +We have some Digital Rebar [example files](https://github.com/talos-systems/talos/tree/master/hack/test/digitalrebar/) in the Git repo you can use to provision Digital Rebar with drpcli. + +To apply these configs you need to create them, and then apply them as follow: + +```bash +$ drpcli bootenvs create talos-0.7 +{ + "Available": true, + "BootParams": "", + "Bundle": "", + "Description": "", + "Documentation": "", + "Endpoint": "", + "Errors": [], + "Initrds": [], + "Kernel": "", + "Meta": {}, + "Name": "talos-0.7", + "OS": { + "Codename": "", + "Family": "", + "IsoFile": "", + "IsoSha256": "", + "IsoUrl": "", + "Name": "", + "SupportedArchitectures": {}, + "Version": "" + }, + "OnlyUnknown": false, + "OptionalParams": [], + "ReadOnly": false, + "RequiredParams": [], + "Templates": [], + "Validated": true +} +``` + +```bash +drpcli bootenvs update talos-0.7 - < bootenv.yaml +``` + +> You need to do this for all files in the example directory. +> If you don't have access to the `drpcli` tools you can also use the webinterface. + +It's important to have a corresponding SHA256 hash matching the boot.tar.gz + +#### Bootenv BootParams + +We're using some of Digital Rebar build in templating to make sure the machine gets the correct role assigned. + +`talos.platform=metal talos.config={{ .ProvisionerURL }}/files/{{.Param \"talos/role\"}}.yaml"` + +This is why we also include a `params.yaml` in the example directory to make sure the role is set to one of the following: + +- controlplane +- init +- worker + +The `{{.Param \"talos/role\"}}` then gets populated with one of the above roles. + +### Boot the Machines + +In the UI of Digital Rebar you need to select the machines you want te provision. +Once selected, you need to assign to following: + +- Profile +- Workflow + +This will provision the Stage and Bootenv with the talos values. +Once this is done, you can boot the machine. + +To understand the boot process, we have a higher level overview located at [metal overview.](https://www.talos.dev/docs/v0.7/en/guides/metal/overview) + +### Retrieve the `kubeconfig` + +Once everything is running we can retrieve the admin `kubeconfig` by running: + +```bash +talosctl --talosconfig talosconfig config endpoint +talosctl --talosconfig talosconfig kubeconfig . +``` diff --git a/docs/website/content/v0.7/en/guides/metal/matchbox.md b/docs/website/content/v0.7/en/guides/metal/matchbox.md new file mode 100644 index 000000000..89744ee3b --- /dev/null +++ b/docs/website/content/v0.7/en/guides/metal/matchbox.md @@ -0,0 +1,188 @@ +--- +title: Matchbox +--- + +## Creating a Cluster + +In this guide we will create an HA Kubernetes cluster with 3 worker nodes. +We assume an existing load balancer, matchbox deployment, and some familiarity with iPXE. + +We leave it up to the user to decide if they would like to use static networking, or DHCP. +The setup and configuration of DHCP will not be covered. + +### Create the Machine Configuration Files + +#### Generating Base Configurations + +Using the DNS name of the load balancer, generate the base configuration files for the Talos machines: + +```bash +$ talosctl gen config talos-k8s-metal-tutorial https://: +created init.yaml +created controlplane.yaml +created join.yaml +created talosconfig +``` + +At this point, you can modify the generated configs to your liking. + +#### Validate the Configuration Files + +```bash +$ talosctl validate --config init.yaml --mode metal +init.yaml is valid for metal mode +$ talosctl validate --config controlplane.yaml --mode metal +controlplane.yaml is valid for metal mode +$ talosctl validate --config join.yaml --mode metal +join.yaml is valid for metal mode +``` + +#### Publishing the Machine Configuration Files + +In bare-metal setups it is up to the user to provide the configuration files over HTTP(S). +A special kernel parameter (`talos.config`) must be used to inform Talos about _where_ it should retreive its' configuration file. +To keep things simple we will place `init.yaml`, `controlplane.yaml`, and `join.yaml` into Matchbox's `assets` directory. +This directory is automatically served by Matchbox. + +### Create the Matchbox Configuration Files + +The profiles we will create will reference `vmlinuz`, and `initramfs.xz`. +Download these files from the [release](https://github.com/talos-systems/talos/releases) of your choice, and place them in `/var/lib/matchbox/assets`. + +#### Profiles + +##### The Bootstrap Node + +```json +{ + "id": "init", + "name": "init", + "boot": { + "kernel": "/assets/vmlinuz", + "initrd": ["/assets/initramfs.xz"], + "args": [ + "initrd=initramfs.xz", + "page_poison=1", + "slab_nomerge", + "slub_debug=P", + "pti=on", + "console=tty0", + "console=ttyS0", + "printk.devkmsg=on", + "talos.platform=metal", + "talos.config=http://matchbox.talos.dev/assets/init.yaml" + ] + } +} +``` + +> Note: Be sure to change `http://matchbox.talos.dev` to the endpoint of your matchbox server. + +##### Additional Control Plane Nodes + +```json +{ + "id": "control-plane", + "name": "control-plane", + "boot": { + "kernel": "/assets/vmlinuz", + "initrd": ["/assets/initramfs.xz"], + "args": [ + "initrd=initramfs.xz", + "page_poison=1", + "slab_nomerge", + "slub_debug=P", + "pti=on", + "console=tty0", + "console=ttyS0", + "printk.devkmsg=on", + "talos.platform=metal", + "talos.config=http://matchbox.talos.dev/assets/controlplane.yaml" + ] + } +} +``` + +##### Worker Nodes + +```json +{ + "id": "default", + "name": "default", + "boot": { + "kernel": "/assets/vmlinuz", + "initrd": ["/assets/initramfs.xz"], + "args": [ + "initrd=initramfs.xz", + "page_poison=1", + "slab_nomerge", + "slub_debug=P", + "pti=on", + "console=tty0", + "console=ttyS0", + "printk.devkmsg=on", + "talos.platform=metal", + "talos.config=http://matchbox.talos.dev/assets/join.yaml" + ] + } +} +``` + +#### Groups + +Now, create the following groups, and ensure that the `selector`s are accurate for your specific setup. + +```json +{ + "id": "control-plane-1", + "name": "control-plane-1", + "profile": "init", + "selector": { + ... + } +} +``` + +```json +{ + "id": "control-plane-2", + "name": "control-plane-2", + "profile": "control-plane", + "selector": { + ... + } +} +``` + +```json +{ + "id": "control-plane-3", + "name": "control-plane-3", + "profile": "control-plane", + "selector": { + ... + } +} +``` + +```json +{ + "id": "default", + "name": "default", + "profile": "default" +} +``` + +### Boot the Machines + +Now that we have our configuraton files in place, boot all the machines. +Talos will come up on each machine, grab its' configuration file, and bootstrap itself. + +### Retrieve the `kubeconfig` + +At this point we can retrieve the admin `kubeconfig` by running: + +```bash +talosctl --talosconfig talosconfig config endpoint +talosctl --talosconfig talosconfig kubeconfig . +``` diff --git a/docs/website/content/v0.7/en/guides/metal/overview.md b/docs/website/content/v0.7/en/guides/metal/overview.md new file mode 100644 index 000000000..7a3ca44c3 --- /dev/null +++ b/docs/website/content/v0.7/en/guides/metal/overview.md @@ -0,0 +1,34 @@ +--- +title: Deploying Talos on Bare Metal +--- + +In this section we will show how you can set up Talos in bare-metal environments. +Any tool using PXE booting can be used to deploy Talos. +We have some documented Talos working with several provisioning tools here: + +- [Arges](arges) is a new project by Talos Systems designed to provide Talos users with a robust and reliable way to build and manage bare metal Talos-based Kubernetes clusters. +Arges uses Cluster-API for a consistent experience, and supports cloud platforms as well as bare metal. +- [Matchbox](matchbox) from Red Hat/CoreOS is a service that matches machines to profiles to PXE boot, and can be used to provision Talos clusters. + +## Generic Information + +### High level overview + +Below is a image to visualize the process of bootstrapping nodes. + + + +### Kernel Parameters + +The following is a list of kernel parameters you will need to set: + +- `talos.config` (required) the HTTP(S) URL at which the machine data can be found +- `talos.platform` (required) should be 'metal' for bare-metal installs + +Talos also enforces some minimum requirements from the KSPP (kernel self-protection project). +The follow parameters are required: + +- `page_poison=1` +- `slab_nomerge` +- `slub_debug=P` +- `pti=on` diff --git a/docs/website/content/v0.7/en/guides/upgrading/index.md b/docs/website/content/v0.7/en/guides/upgrading/index.md new file mode 100644 index 000000000..a998eaf2f --- /dev/null +++ b/docs/website/content/v0.7/en/guides/upgrading/index.md @@ -0,0 +1,277 @@ +--- +title: Upgrading +--- + +## Talos + +In an effort to create more production ready clusters, Talos will now taint control plane nodes as unschedulable. +This means that any application you might have deployed must tolerate this taint if you intend on running the application on control plane nodes. + +Another feature you will notice is the automatic uncordoning of nodes that have been upgraded. +Talos will now uncordon a node if the cordon was initiated by the upgrade process. + +## Talosctl + +The `talosctl` CLI now requires an explicit set of nodes. +This can be configured with `talos config nodes` or set on the fly with `talos --nodes`. + +## Kubernetes + +In Kubernetes 1.19, the official `hyperkube` image was removed. +This means that in order to upgrade Kubernetes, Talos users will have to change the `command`, and `image` fields of each control plane component. +The `kubelet` image will also have to be updated, if you wish to specify the `kubelet` image explicitly. +The default used by Talos is sufficient in most cases. + +In order to edit the control plane, we will need a working `kubectl` config. +If you don't already have one, you can get one by running: + +```bash +talosctl --nodes kubeconfig +``` + +### Automated Kubernetes Upgrade + +In Talos v0.6.1 we introduced the `upgrade-k8s` command in `talosctl`. +This command can be used to automate the Kubernetes upgrade process. +For example, to upgrade from Kubernetes v1.18.6 to v1.19.0 run: + +```bash +$ talosctl --nodes upgrade-k8s --from 1.18.6 --to 1.19.0 +updating pod-checkpointer grace period to "0m" +sleeping 5m0s to let the pod-checkpointer self-checkpoint be updated +temporarily taking "kube-apiserver" out of pod-checkpointer control +updating daemonset "kube-apiserver" to version "1.19.0" +updating daemonset "kube-controller-manager" to version "1.19.0" +updating daemonset "kube-scheduler" to version "1.19.0" +updating daemonset "kube-proxy" to version "1.19.0" +updating pod-checkpointer grace period to "5m0s" +``` + +### Manual Kubernetes Upgrade + +Kubernetes can be upgraded manually as well by following the steps outlined below. +They are equivalent to the steps performed by the `talosctl upgrade-k8s` command. + +#### pod-checkpointer + +Talos runs `pod-checkpointer` component which helps to recover control plane components (specifically, API server) if control plane is not healthy. + +However, the way checkpoints interact with API server upgrade may make an upgrade take a lot longer due to a race condition on API server listen port. + +In order to speed up upgrades, first lower `pod-checkpointer` grace period to zero (`kubectl -n kube-system edit daemonset pod-checkpointer`), change: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: pod-checkpointer + command: + ... + - --checkpoint-grace-period=5m0s +``` + +to: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: pod-checkpointer + command: + ... + - --checkpoint-grace-period=0s +``` + +Wait for 5 minutes to let `pod-checkpointer` update self-checkpoint to the new grace period. + +#### API Server + +In the API server's `DaemonSet`, change: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-apiserver + image: ... + command: + - ./hyperkube + - kube-apiserver +``` + +to: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-apiserver + image: k8s.gcr.io/kube-apiserver:v1.19.0 + command: + - /go-runner + - /usr/local/bin/kube-apiserver +``` + +To edit the `DaemonSet`, run: + +```bash +kubectl edit daemonsets -n kube-system kube-apiserver +``` + +#### Controller Manager + +In the controller manager's `DaemonSet`, change: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-controller-manager + image: ... + command: + - ./hyperkube + - kube-controller-manager +``` + +to: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-controller-manager + image: k8s.gcr.io/kube-controller-manager:v1.19.0 + command: + - /go-runner + - /usr/local/bin/kube-controller-manager +``` + +To edit the `DaemonSet`, run: + +```bash +kubectl edit daemonsets -n kube-system kube-controller-manager +``` + +#### Scheduler + +In the scheduler's `DaemonSet`, change: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-scheduler + image: ... + command: + - ./hyperkube + - kube-scheduler +``` + +to: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: kube-sceduler + image: k8s.gcr.io/kube-scheduler:v1.19.0 + command: + - /go-runner + - /usr/local/bin/kube-scheduler +``` + +To edit the `DaemonSet`, run: + +```bash +kubectl edit daemonsets -n kube-system kube-scheduler +``` + +#### Restoring pod-checkpointer + +Restore grace period of 5 minutes (`kubectl -n kube-system edit daemonset pod-checkpointer`), change: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: pod-checkpointer + command: + ... + - --checkpoint-grace-period=0s +``` + +to: + +```yaml +kind: DaemonSet +... +spec: + ... + template: + ... + spec: + containers: + - name: pod-checkpointer + command: + ... + - --checkpoint-grace-period=5m0s +``` + +### Kubelet + +The Talos team now maintains an image for the `kubelet` that should be used starting with Kubernetes 1.19. +The image for this release is `docker.io/autonomy/kubelet:v1.19.0`. +To explicitly set the image, we can use the [official documentation](https://www.talos.dev/docs/v0.7/en/configuration/v1alpha1#kubelet). +For example: + +```yaml +machine: + ... + kubelet: + image: docker.io/autonomy/kubelet:v1.19.0 +``` diff --git a/docs/website/content/v0.7/en/troubleshooting/machine-reset.md b/docs/website/content/v0.7/en/troubleshooting/machine-reset.md new file mode 100644 index 000000000..24db8d75d --- /dev/null +++ b/docs/website/content/v0.7/en/troubleshooting/machine-reset.md @@ -0,0 +1,21 @@ +--- +title: 'Machine Reset' +--- + +From time to time, it may be beneficial to reset a Talos machine to its "original" state. +Bear in mind that this is a destructive action for the given machine. +Doing this means removing the machine from Kubernetes, Etcd (if applicable), and clears any data on the machine that would normally persist a reboot. + +The API command for doing this is `talosctl reset`. +There are a couple of flags as part of this command: + +```bash +Flags: + --graceful if true, attempt to cordon/drain node and leave etcd (if applicable) (default true) + --reboot if true, reboot the node after resetting instead of shutting down +``` + +The `graceful` flag is especially important when considering HA vs. non-HA Talos clusters. +If the machine is part of an HA cluster, a normal, graceful reset should work just fine right out of the box as long as the cluster is in a good state. +However, if this is a single node cluster being used for testing purposes, a graceful reset is not an option since Etcd cannot be "left" if there is only a single member. +In this case, reset should be used with `--graceful=false` to skip performing checks that would normally block the reset. diff --git a/docs/website/content/v0.7/en/troubleshooting/overview.md b/docs/website/content/v0.7/en/troubleshooting/overview.md new file mode 100644 index 000000000..b90c433ad --- /dev/null +++ b/docs/website/content/v0.7/en/troubleshooting/overview.md @@ -0,0 +1,5 @@ +--- +title: 'Troubleshooting' +--- + +In this section we will provide guidance on troubleshooting various scenarios you may find yourself in. diff --git a/docs/website/content/v0.7/en/troubleshooting/pki.md b/docs/website/content/v0.7/en/troubleshooting/pki.md new file mode 100644 index 000000000..374aae905 --- /dev/null +++ b/docs/website/content/v0.7/en/troubleshooting/pki.md @@ -0,0 +1,48 @@ +--- +title: 'PKI' +--- + +## Generating an Administrator Key Pair + +In order to create a key pair, you will need the root CA. + +Save the the CA public key, and CA private key as `ca.crt`, and `ca.key` respectively. +Now, run the following commands to generate a certificate: + +```bash +talosctl gen key --name admin +talosctl gen csr --key admin.key --ip 127.0.0.1 +talosctl gen crt --ca ca --csr admin.csr --name admin +``` + +Now, base64 encode `admin.crt`, and `admin.key`: + +```bash +cat admin.crt | base64 +cat admin.key | base64 +``` + +You can now set the `crt` and `key` fields in the `talosconfig` to the base64 encoded strings. + +## Renewing an Expired Administrator Certificate + +In order to renew the certificate, you will need the root CA, and the admin private key. +The base64 encoded key can be found in any one of the control plane node's configuration file. +Where it is exactly will depend on the specific version of the configuration file you are using. + +Save the the CA public key, CA private key, and admin private key as `ca.crt`, `ca.key`, and `admin.key` respectively. +Now, run the following commands to generate a certificate: + +```bash +talosctl gen csr --key admin.key --ip 127.0.0.1 +talosctl gen crt --ca ca --csr admin.csr --name admin +``` + +You should see `admin.crt` in your current directory. +Now, base64 encode `admin.crt`: + +```bash +cat admin.crt | base64 +``` + +You can now set the certificate in the `talosconfig` to the base64 encoded string.