mirror of
https://github.com/siderolabs/talos.git
synced 2025-08-30 19:11:13 +02:00
Most crucial changes in `init/main.go`: on shutdown now Talos tries to stop gracefully all the services. All the shutdown paths are unified, including poweroff, reboot and panic handling on startup. While I was at it, I also fixed bug with containers failing to start when old snapshot is still around. Service lifecycle is wrapped with `ServiceRunner` object now which handles state transitions and captures events related to state changes. Every change goes to the log as well. There's no way to capture service state yet, but that is planned to be implemented as RPC API for `init` which is exposed via `osd` to `osctl`. Future steps: 1. Implement service dependencies for correct startup order and shutdown order. 2. Implement service health, so that we can say "start trustd when containerd is up and healthy". 3. Implement gRPC API for init, expose via osd (service status, restart, poweroff, ...) 4. Impement 'String()' for conditions, so that we can see what service is waiting on right now. Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
174 lines
3.7 KiB
Go
174 lines
3.7 KiB
Go
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
package restart
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/talos-systems/talos/internal/app/init/pkg/system/events"
|
|
"github.com/talos-systems/talos/internal/app/init/pkg/system/runner"
|
|
)
|
|
|
|
type restarter struct {
|
|
wrappedRunner runner.Runner
|
|
opts *Options
|
|
|
|
stop chan struct{}
|
|
stopped chan struct{}
|
|
}
|
|
|
|
// New wraps runner.Runner with restart policy
|
|
func New(wrapRunner runner.Runner, opts ...Option) runner.Runner {
|
|
r := &restarter{
|
|
wrappedRunner: wrapRunner,
|
|
opts: DefaultOptions(),
|
|
stop: make(chan struct{}),
|
|
stopped: make(chan struct{}),
|
|
}
|
|
|
|
for _, opt := range opts {
|
|
opt(r.opts)
|
|
}
|
|
|
|
return r
|
|
}
|
|
|
|
// Options is the functional options struct.
|
|
type Options struct {
|
|
// Type describes the service's restart policy.
|
|
Type Type
|
|
// RestartInterval is the interval between restarts for failed runs
|
|
RestartInterval time.Duration
|
|
}
|
|
|
|
// Option is the functional option func.
|
|
type Option func(*Options)
|
|
|
|
// Type represents the service's restart policy.
|
|
type Type int
|
|
|
|
const (
|
|
// Forever will always restart a process.
|
|
Forever Type = iota
|
|
// Once will run process exactly once
|
|
Once
|
|
// UntilSuccess will restart process until run succeeds
|
|
UntilSuccess
|
|
)
|
|
|
|
func (t Type) String() string {
|
|
switch t {
|
|
case Forever:
|
|
return "Forever"
|
|
case Once:
|
|
return "Once"
|
|
case UntilSuccess:
|
|
return "UntilSuccess"
|
|
default:
|
|
return "Unknown"
|
|
}
|
|
}
|
|
|
|
// DefaultOptions describes the default options to a runner.
|
|
func DefaultOptions() *Options {
|
|
return &Options{
|
|
Type: Forever,
|
|
RestartInterval: 5 * time.Second,
|
|
}
|
|
}
|
|
|
|
// WithType sets the type of a service.
|
|
func WithType(o Type) Option {
|
|
return func(args *Options) {
|
|
args.Type = o
|
|
}
|
|
}
|
|
|
|
// WithRestartInterval sets the interval between restarts of the failed task
|
|
func WithRestartInterval(interval time.Duration) Option {
|
|
return func(args *Options) {
|
|
args.RestartInterval = interval
|
|
}
|
|
}
|
|
|
|
// Open implements the Runner interface
|
|
func (r *restarter) Open(ctx context.Context) error {
|
|
return r.wrappedRunner.Open(ctx)
|
|
}
|
|
|
|
// Run implements the Runner interface
|
|
// nolint: gocyclo
|
|
func (r *restarter) Run(eventSink events.Recorder) error {
|
|
defer close(r.stopped)
|
|
|
|
for {
|
|
errCh := make(chan error)
|
|
|
|
go func() {
|
|
errCh <- r.wrappedRunner.Run(eventSink)
|
|
}()
|
|
|
|
var err error
|
|
|
|
select {
|
|
case <-r.stop:
|
|
// nolint: errcheck
|
|
_ = r.wrappedRunner.Stop()
|
|
|
|
return <-errCh
|
|
case err = <-errCh:
|
|
}
|
|
|
|
errStop := r.wrappedRunner.Stop()
|
|
if errStop != nil {
|
|
return errStop
|
|
}
|
|
|
|
switch r.opts.Type {
|
|
case Once:
|
|
return err
|
|
case UntilSuccess:
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
eventSink(events.StateWaiting, "Error running %s, going to restart until it succeeds: %s", r.wrappedRunner, err)
|
|
case Forever:
|
|
if err == nil {
|
|
eventSink(events.StateWaiting, "Runner %s exited without error, going to restart it", r.wrappedRunner)
|
|
} else {
|
|
eventSink(events.StateWaiting, "Error running %v, going to restart forever: %s", r.wrappedRunner, err)
|
|
}
|
|
}
|
|
|
|
select {
|
|
case <-r.stop:
|
|
eventSink(events.StateStopping, "Aborting restart sequence")
|
|
return nil
|
|
case <-time.After(r.opts.RestartInterval):
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop implements the Runner interface
|
|
func (r *restarter) Stop() error {
|
|
close(r.stop)
|
|
|
|
<-r.stopped
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close implements the Runner interface
|
|
func (r *restarter) Close() error {
|
|
return r.wrappedRunner.Close()
|
|
}
|
|
|
|
// String implements the Runner interface
|
|
func (r *restarter) String() string {
|
|
return fmt.Sprintf("Restart(%s, %s)", r.opts.Type, r.wrappedRunner)
|
|
}
|