mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-09 22:51:12 +02:00
Network probes are configured with the specs, and provide their output as a status. At the moment only platform code can configure network probes. If any network probes are configured, they affect network.Status 'Connectivity' flag. Example, create the probe: ``` talosctl -n 172.20.0.3 meta write 0xa '{"probes": [{"interval": "1s", "tcp": {"endpoint": "google.com:80", "timeout": "10s"}}]}' ``` Watch probe status: ``` $ talosctl -n 172.20.0.3 get probe NODE NAMESPACE TYPE ID VERSION SUCCESS 172.20.0.3 network ProbeStatus tcp:google.com:80 5 true ``` With failing probes: ``` $ talosctl -n 172.20.0.3 get probe NODE NAMESPACE TYPE ID VERSION SUCCESS 172.20.0.3 network ProbeStatus tcp:google.com:80 4 true 172.20.0.3 network ProbeStatus tcp:google.com:81 1 false $ talosctl -n 172.20.0.3 get networkstatus NODE NAMESPACE TYPE ID VERSION ADDRESS CONNECTIVITY HOSTNAME ETC 172.20.0.3 network NetworkStatus status 5 true true true true ``` Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
157 lines
4.4 KiB
Go
157 lines
4.4 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
package network
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/cosi-project/runtime/pkg/controller"
|
|
"github.com/cosi-project/runtime/pkg/resource"
|
|
"github.com/cosi-project/runtime/pkg/safe"
|
|
"github.com/cosi-project/runtime/pkg/state"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/siderolabs/talos/internal/app/machined/pkg/controllers/network/internal/probe"
|
|
"github.com/siderolabs/talos/pkg/machinery/resources/network"
|
|
)
|
|
|
|
// ProbeController runs network probes configured with ProbeSpecs and outputs ProbeStatuses.
|
|
type ProbeController struct {
|
|
runners map[string]*probe.Runner
|
|
}
|
|
|
|
// Name implements controller.Controller interface.
|
|
func (ctrl *ProbeController) Name() string {
|
|
return "network.ProbeController"
|
|
}
|
|
|
|
// Inputs implements controller.Controller interface.
|
|
func (ctrl *ProbeController) Inputs() []controller.Input {
|
|
return []controller.Input{
|
|
{
|
|
Namespace: network.NamespaceName,
|
|
Type: network.ProbeSpecType,
|
|
Kind: controller.InputWeak,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Outputs implements controller.Controller interface.
|
|
func (ctrl *ProbeController) Outputs() []controller.Output {
|
|
return []controller.Output{
|
|
{
|
|
Type: network.ProbeStatusType,
|
|
Kind: controller.OutputExclusive,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Run implements controller.Controller interface.
|
|
func (ctrl *ProbeController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
|
|
notifyCh := make(chan probe.Notification)
|
|
|
|
ctrl.runners = make(map[string]*probe.Runner)
|
|
|
|
defer func() {
|
|
for _, runner := range ctrl.runners {
|
|
runner.Stop()
|
|
}
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case <-r.EventCh():
|
|
if err := ctrl.reconcileRunners(ctx, r, logger, notifyCh); err != nil {
|
|
return err
|
|
}
|
|
case ev := <-notifyCh:
|
|
if err := ctrl.reconcileOutputs(ctx, r, ev); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
r.ResetRestartBackoff()
|
|
}
|
|
}
|
|
|
|
//nolint:gocyclo
|
|
func (ctrl *ProbeController) reconcileRunners(ctx context.Context, r controller.Runtime, logger *zap.Logger, notifyCh chan<- probe.Notification) error {
|
|
specList, err := safe.ReaderList[*network.ProbeSpec](ctx, r, resource.NewMetadata(network.NamespaceName, network.ProbeSpecType, "", resource.VersionUndefined))
|
|
if err != nil {
|
|
return fmt.Errorf("error listing probe specs: %w", err)
|
|
}
|
|
|
|
// figure out which operators should run
|
|
shouldRun := make(map[string]network.ProbeSpecSpec)
|
|
|
|
for iter := safe.IteratorFromList(specList); iter.Next(); {
|
|
shouldRun[iter.Value().Metadata().ID()] = *iter.Value().TypedSpec()
|
|
}
|
|
|
|
// stop running probes which shouldn't run
|
|
for id := range ctrl.runners {
|
|
if _, exists := shouldRun[id]; !exists {
|
|
logger.Debug("stopping probe", zap.String("probe", id))
|
|
|
|
ctrl.runners[id].Stop()
|
|
delete(ctrl.runners, id)
|
|
} else if !shouldRun[id].Equal(ctrl.runners[id].Spec) {
|
|
logger.Debug("replacing probe", zap.String("probe", id))
|
|
|
|
ctrl.runners[id].Stop()
|
|
delete(ctrl.runners, id)
|
|
}
|
|
}
|
|
|
|
// start probes which aren't running
|
|
for id := range shouldRun {
|
|
if _, exists := ctrl.runners[id]; !exists {
|
|
ctrl.runners[id] = &probe.Runner{
|
|
ID: id,
|
|
Spec: shouldRun[id],
|
|
}
|
|
|
|
logger.Debug("starting probe", zap.String("probe", id))
|
|
ctrl.runners[id].Start(ctx, notifyCh, logger)
|
|
}
|
|
}
|
|
|
|
// clean up statuses which should no longer exist
|
|
statusList, err := safe.ReaderList[*network.ProbeStatus](ctx, r, resource.NewMetadata(network.NamespaceName, network.ProbeStatusType, "", resource.VersionUndefined))
|
|
if err != nil {
|
|
return fmt.Errorf("error listing probe statuses: %w", err)
|
|
}
|
|
|
|
for iter := safe.IteratorFromList(statusList); iter.Next(); {
|
|
if _, exists := shouldRun[iter.Value().Metadata().ID()]; exists {
|
|
continue
|
|
}
|
|
|
|
if err = r.Destroy(ctx, iter.Value().Metadata()); err != nil && !state.IsNotFoundError(err) {
|
|
return fmt.Errorf("error destroying probe status: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
//nolint:gocyclo,cyclop
|
|
func (ctrl *ProbeController) reconcileOutputs(ctx context.Context, r controller.Runtime, ev probe.Notification) error {
|
|
if _, exists := ctrl.runners[ev.ID]; !exists {
|
|
// probe was already removed, late notification, ignore it
|
|
return nil
|
|
}
|
|
|
|
return safe.WriterModify(ctx, r, network.NewProbeStatus(network.NamespaceName, ev.ID),
|
|
func(status *network.ProbeStatus) error {
|
|
*status.TypedSpec() = ev.Status
|
|
|
|
return nil
|
|
})
|
|
}
|