Andrey Smirnov bf4c81e7da
feat: kernel log (kmsg) delivery controller
This controller is enabled when `talos.logging.kernel=` kernel arg is
passed. It will try to send logs to the endpoint as JSON-over-TCP (or
UDP-over-TCP, depends on the endpoint) as structured messages.

Example (from test implementation):

```
2021-11-26T19:53:21.912+0300	INFO	siderolink-agent/log_receiver.go:23	kernel log message	{"src_address": "fdae:41e4:649b:9303:680a:dfab:f7fa:ea00", "msg": {"clock":6252819,"facility":"user","msg":"[talos] task startAllServices (1/1): waiting for 6 services\n","priority":"warning","seq":711,"talos-level":"warn","talos-time":"2021-11-26T16:53:21.3258698Z"}}
```

Fixes #4455

See also https://github.com/talos-systems/siderolink/pull/4

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
2021-11-29 17:32:35 +03:00

183 lines
4.6 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package runtime
import (
"context"
"fmt"
"net/url"
"time"
"github.com/AlekSi/pointer"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/talos-systems/go-kmsg"
"github.com/talos-systems/go-procfs/procfs"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/logging"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/machinery/resources/network"
)
const drainTimeout = 100 * time.Millisecond
// KmsgLogDeliveryController watches events and forwards them to the events sink server
// if it's configured.
type KmsgLogDeliveryController struct {
Cmdline *procfs.Cmdline
Drainer *runtime.Drainer
drainSub *runtime.DrainSubscription
}
// Name implements controller.Controller interface.
func (ctrl *KmsgLogDeliveryController) Name() string {
return "runtime.KmsgLogDeliveryController"
}
// Inputs implements controller.Controller interface.
func (ctrl *KmsgLogDeliveryController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: network.NamespaceName,
Type: network.StatusType,
ID: pointer.ToString(network.StatusID),
Kind: controller.InputWeak,
},
}
}
// Outputs implements controller.Controller interface.
func (ctrl *KmsgLogDeliveryController) Outputs() []controller.Output {
return nil
}
// Run implements controller.Controller interface.
//
//nolint:gocyclo,cyclop
func (ctrl *KmsgLogDeliveryController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) (err error) {
if ctrl.Cmdline == nil || ctrl.Cmdline.Get(constants.KernelParamLoggingKernel).First() == nil {
return nil
}
for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
}
var netStatus resource.Resource
netStatus, err = r.Get(ctx, resource.NewMetadata(network.NamespaceName, network.StatusType, network.StatusID, resource.VersionUndefined))
if err != nil {
if state.IsNotFoundError(err) {
// no network state yet
continue
}
return fmt.Errorf("error reading network status: %w", err)
}
if !netStatus.(*network.Status).TypedSpec().AddressReady {
// wait for address
continue
}
break
}
if ctrl.drainSub == nil {
ctrl.drainSub = ctrl.Drainer.Subscribe()
}
destURL, err := url.Parse(*ctrl.Cmdline.Get(constants.KernelParamLoggingKernel).First())
if err != nil {
return fmt.Errorf("error parsing %q: %w", constants.KernelParamLoggingKernel, err)
}
sender := logging.NewJSONLines(destURL)
defer sender.Close(ctx) //nolint:errcheck
reader, err := kmsg.NewReader(kmsg.Follow())
if err != nil {
return fmt.Errorf("error reading kernel messages: %w", err)
}
defer reader.Close() //nolint:errcheck
kmsgCh := reader.Scan(ctx)
var (
drainTimer *time.Timer
drainTimerCh <-chan time.Time
)
for {
var msg kmsg.Packet
select {
case <-ctx.Done():
ctrl.drainSub.Cancel()
return nil
case msg = <-kmsgCh:
if drainTimer != nil {
// if draining, reset the timer as there's a new message
if !drainTimer.Stop() {
<-drainTimer.C
}
drainTimer.Reset(drainTimeout)
}
case <-ctrl.drainSub.EventCh():
// drain started, assume that ksmg is drained if there're no new messages in drainTimeout
drainTimer = time.NewTimer(drainTimeout)
drainTimerCh = drainTimer.C
case <-drainTimerCh:
ctrl.drainSub.Cancel()
return nil
}
if msg.Err != nil {
return fmt.Errorf("error receiving kernel logs: %w", msg.Err)
}
if err = sender.Send(ctx, &runtime.LogEvent{
Msg: msg.Message.Message,
Time: msg.Message.Timestamp,
Level: kmsgPriorityToLevel(msg.Message.Priority),
Fields: map[string]interface{}{
"facility": msg.Message.Facility.String(),
"seq": msg.Message.SequenceNumber,
"clock": msg.Message.Clock,
"priority": msg.Message.Priority.String(),
},
}); err != nil {
return fmt.Errorf("error sending logs: %w", err)
}
}
}
func kmsgPriorityToLevel(pri kmsg.Priority) zapcore.Level {
switch pri {
case kmsg.Alert, kmsg.Crit, kmsg.Emerg, kmsg.Err:
return zapcore.ErrorLevel
case kmsg.Debug:
return zapcore.DebugLevel
case kmsg.Info, kmsg.Notice:
return zapcore.InfoLevel
case kmsg.Warning:
return zapcore.WarnLevel
default:
return zapcore.ErrorLevel
}
}