Andrey Smirnov 2bf8540855 test: provision Talos clusters via Firecracker VMs
This is initial PR to push the initial code, it has several known
problems which are going to be addressed in follow-up PRs:

1. there's no "cluster destroy", so the only way to stop the VMs is to
`pkill firecracker`

2. provisioner creates state in `/tmp` and never deletes it, that is
required to keep cluster running when `osctl cluster create` finishes

3. doesn't run any controller process around firecracker to support
reboots/CNI cleanup (vethxyz interfaces are lingering on the host as
they're never cleaned up)

The plan is to create some structure in `~/.talos` to manage cluster
state, e.g. `~/.talos/clusters/<name>` which will contain all the
required files (disk images, file sockets, VM logs, etc.). This
directory structure will also work as a way to detect running clusters
and clean them up.

For point number 3, `osctl cluster create` is going to exec lightweight
process to control the firecracker VM process and to simulate VM reboots
if firecracker finishes cleanly (when VM reboots).

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
2020-01-16 00:27:08 +03:00

108 lines
2.9 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package firecracker
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io/ioutil"
"time"
"github.com/talos-systems/talos/internal/pkg/provision"
"github.com/talos-systems/talos/internal/pkg/provision/providers/firecracker/inmemhttp"
)
// Create Talos cluster as a set of firecracker micro-VMs.
//
//nolint: gocyclo
func (p *provisioner) Create(ctx context.Context, request provision.ClusterRequest, opts ...provision.Option) (provision.Cluster, error) {
options := provision.DefaultOptions()
for _, opt := range opts {
if err := opt(&options); err != nil {
return nil, err
}
}
state := &state{}
fmt.Fprintln(options.LogWriter, "creating network", request.Network.Name)
// build bridge interface name by taking part of checksum of the network name
// so that interface name is defined by network name, and different networks have
// different bridge interfaces
networkNameHash := sha256.Sum256([]byte(request.Network.Name))
state.bridgeInterfaceName = fmt.Sprintf("%s%s", "talos", hex.EncodeToString(networkNameHash[:])[:8])
if err := p.createNetwork(ctx, state, request.Network); err != nil {
return nil, fmt.Errorf("unable to provision CNI network: %w", err)
}
httpServer, err := inmemhttp.NewServer(fmt.Sprintf("%s:0", request.Network.GatewayAddr))
if err != nil {
return nil, err
}
for _, node := range request.Nodes {
var cfg string
cfg, err = node.Config.String()
if err != nil {
return nil, err
}
if err = httpServer.AddFile(fmt.Sprintf("%s.yaml", node.Name), []byte(cfg)); err != nil {
return nil, err
}
}
state.baseConfigURL = fmt.Sprintf("http://%s/", httpServer.GetAddr())
httpServer.Serve()
defer httpServer.Shutdown(ctx) //nolint: errcheck
state.tempDir, err = ioutil.TempDir("", "talos")
if err != nil {
return nil, err
}
fmt.Fprintf(options.LogWriter, "created temporary environment in %q\n", state.tempDir)
var nodeInfo []provision.NodeInfo
fmt.Fprintln(options.LogWriter, "creating master nodes")
if nodeInfo, err = p.createNodes(ctx, state, request, request.Nodes.MasterNodes()); err != nil {
return nil, err
}
fmt.Fprintln(options.LogWriter, "creating worker nodes")
var workerNodeInfo []provision.NodeInfo
if workerNodeInfo, err = p.createNodes(ctx, state, request, request.Nodes.WorkerNodes()); err != nil {
return nil, err
}
nodeInfo = append(nodeInfo, workerNodeInfo...)
// TODO: temporary, need to wait for all nodes to finish bootstrapping
// before shutting down config HTTP service
time.Sleep(30 * time.Second)
state.clusterInfo = provision.ClusterInfo{
ClusterName: request.Name,
Network: provision.NetworkInfo{
Name: request.Network.Name,
CIDR: request.Network.CIDR,
},
Nodes: nodeInfo,
}
return state, nil
}