talos/internal/app/machined/pkg/controllers/runtime/diagnostics_logger.go
Andrey Smirnov 8dbe2128a9
feat: implement Talos diagnostics
Talos diagnostics analyzes current system state and comes up with detailed
warnings on the system misconfiguration which might be tricky to figure
out other way.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
2024-06-05 22:28:15 +04:00

115 lines
3.1 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package runtime
import (
"context"
"fmt"
"time"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"go.uber.org/zap"
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)
// DiagnosticsLoggerController logs warnings generated by DiagnosticsController.
type DiagnosticsLoggerController struct{}
// Name implements controller.Controller interface.
func (ctrl *DiagnosticsLoggerController) Name() string {
return "runtime.DiagnosticsLoggerController"
}
// Inputs implements controller.Controller interface.
func (ctrl *DiagnosticsLoggerController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: runtime.NamespaceName,
Type: runtime.DiagnosticType,
Kind: controller.InputWeak,
},
}
}
// Outputs implements controller.Controller interface.
func (ctrl *DiagnosticsLoggerController) Outputs() []controller.Output {
return nil
}
const diagnosticsReportInterval = 5 * time.Minute
// Run implements controller.Controller interface.
//
//nolint:gocyclo
func (ctrl *DiagnosticsLoggerController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
reportedWarnings := map[string]struct{}{}
ticker := time.NewTicker(diagnosticsReportInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
warnings, err := safe.ReaderListAll[*runtime.Diagnostic](ctx, r)
if err != nil {
return fmt.Errorf("error listing diagnostics: %w", err)
}
seenWarnings := map[string]struct{}{}
for iter := warnings.Iterator(); iter.Next(); {
warning := iter.Value()
seenWarnings[warning.Metadata().ID()] = struct{}{}
if _, reported := reportedWarnings[warning.Metadata().ID()]; !reported {
logger.Warn("new diagnostic",
zap.String("id", warning.Metadata().ID()),
zap.String("message", warning.TypedSpec().Message),
zap.Strings("details", warning.TypedSpec().Details),
zap.String("url", warning.TypedSpec().DocumentationURL(warning.Metadata().ID())),
)
reportedWarnings[warning.Metadata().ID()] = struct{}{}
}
}
for id := range reportedWarnings {
if _, seen := seenWarnings[id]; !seen {
logger.Info("diagnostic resolved", zap.String("id", id))
delete(reportedWarnings, id)
}
}
case <-ticker.C:
if len(reportedWarnings) == 0 {
continue
}
warnings, err := safe.ReaderListAll[*runtime.Diagnostic](ctx, r)
if err != nil {
return fmt.Errorf("error listing diagnostics: %w", err)
}
for iter := warnings.Iterator(); iter.Next(); {
warning := iter.Value()
logger.Warn("diagnostic still active",
zap.String("id", warning.Metadata().ID()),
zap.String("message", warning.TypedSpec().Message),
zap.Strings("details", warning.TypedSpec().Details),
zap.String("url", warning.TypedSpec().DocumentationURL(warning.Metadata().ID())),
)
}
}
r.ResetRestartBackoff()
}
}