fix: wait for udevd before starting sync

Fixes #8074

One part of the fix is to wait for udevd to be ready, as anyways before
udevd is ready network interfaces are not ready, so sync is not
possible.

Second part is that now u-root's rtc package supports closing rtc
devices, so we can properly open/close it as part of the sync loop (vs.
previous workaround with sync.Once).

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2024-11-27 20:33:03 +04:00
parent bef4d51508
commit c735d14928
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
5 changed files with 49 additions and 43 deletions

View File

@ -17,6 +17,7 @@ import (
"github.com/siderolabs/gen/optional"
"go.uber.org/zap"
"github.com/siderolabs/talos/internal/app/machined/pkg/controllers/runtime"
v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/pkg/ntp"
"github.com/siderolabs/talos/pkg/machinery/resources/config"
@ -39,19 +40,7 @@ func (ctrl *SyncController) Name() string {
// Inputs implements controller.Controller interface.
func (ctrl *SyncController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: network.NamespaceName,
Type: network.TimeServerStatusType,
ID: optional.Some(network.TimeServerID),
Kind: controller.InputWeak,
},
{
Namespace: config.NamespaceName,
Type: config.MachineConfigType,
ID: optional.Some(config.V1Alpha1ID),
},
}
return nil
}
// Outputs implements controller.Controller interface.
@ -89,6 +78,25 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge
}
}
// wait for udevd to be healthy, which implies that all RTC devices
if err := runtime.WaitForDevicesReady(ctx, r,
[]controller.Input{
{
Namespace: network.NamespaceName,
Type: network.TimeServerStatusType,
ID: optional.Some(network.TimeServerID),
Kind: controller.InputWeak,
},
{
Namespace: config.NamespaceName,
Type: config.MachineConfigType,
ID: optional.Some(config.V1Alpha1ID),
},
},
); err != nil {
return err
}
var (
syncCtx context.Context
syncCtxCancel context.CancelFunc

View File

@ -30,6 +30,7 @@ import (
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/config"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
timeresource "github.com/siderolabs/talos/pkg/machinery/resources/time"
v1alpha1resource "github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1"
)
@ -62,6 +63,11 @@ func (suite *SyncSuite) SetupTest() {
suite.runtime, err = runtime.NewRuntime(suite.state, zaptest.NewLogger(suite.T()))
suite.Require().NoError(err)
// create fake device ready status
deviceStatus := runtimeres.NewDevicesStatus(runtimeres.NamespaceName, runtimeres.DevicesID)
deviceStatus.TypedSpec().Ready = true
suite.Require().NoError(suite.state.Create(suite.ctx, deviceStatus))
}
func (suite *SyncSuite) startRuntime() {

View File

@ -51,6 +51,7 @@ type Syncer struct {
CurrentTime CurrentTimeFunc
NTPQuery QueryFunc
AdjustTime AdjustTimeFunc
DisableRTC bool
}
// Measurement is a struct containing correction data based on a time request.
@ -159,14 +160,19 @@ func (syncer *Syncer) isSpike(resp *ntp.Response) bool {
//
//nolint:gocyclo,cyclop
func (syncer *Syncer) Run(ctx context.Context) {
RTCClockInitialize.Do(func() {
var err error
var (
rtcClock *rtc.RTC
err error
)
RTCClock, err = rtc.OpenRTC()
if !syncer.DisableRTC {
rtcClock, err = rtc.OpenRTC()
if err != nil {
syncer.logger.Error("failure opening RTC, ignored", zap.Error(err))
} else {
defer rtcClock.Close() //nolint:errcheck
}
})
}
pollInterval := time.Duration(0)
@ -216,7 +222,7 @@ func (syncer *Syncer) Run(ctx context.Context) {
)
if resp != nil && !spike {
err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval)
err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval, rtcClock)
if err == nil {
if !syncer.timeSyncNotified {
@ -415,7 +421,7 @@ func log2i(v uint64) int {
// adjustTime adds an offset to the current time.
//
//nolint:gocyclo
func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndicator, server string, nextPollInterval time.Duration) error {
func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndicator, server string, nextPollInterval time.Duration, rtcClock *rtc.RTC) error {
var (
buf bytes.Buffer
req unix.Timex
@ -509,8 +515,8 @@ func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndica
}
if jump {
if RTCClock != nil {
if rtcErr := RTCClock.Set(time.Now().Add(offset)); rtcErr != nil {
if rtcClock != nil {
if rtcErr := rtcClock.Set(time.Now().Add(offset)); rtcErr != nil {
syncer.logger.Error("error syncing RTC", zap.Error(rtcErr))
} else {
syncer.logger.Info("synchronized RTC with system clock")

View File

@ -40,11 +40,6 @@ func TestNTPSuite(t *testing.T) {
suite.Run(t, new(NTPSuite))
}
func (suite *NTPSuite) SetupSuite() {
// disable RTC clock
ntp.RTCClockInitialize.Do(func() {})
}
func (suite *NTPSuite) SetupTest() {
suite.systemClock = time.Now().UTC()
suite.clockAdjustments = nil
@ -194,6 +189,7 @@ func (suite *NTPSuite) TestSync() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.DisableRTC = true
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -225,6 +221,7 @@ func (suite *NTPSuite) TestSyncContinuous() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
syncer.MinPoll = time.Second
syncer.MaxPoll = time.Second
@ -273,6 +270,7 @@ func (suite *NTPSuite) TestSyncKissOfDeath() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
syncer.MinPoll = time.Second
syncer.MaxPoll = time.Second
@ -326,6 +324,7 @@ func (suite *NTPSuite) TestSyncWithSpikes() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
syncer.MinPoll = time.Second
syncer.MaxPoll = time.Second
@ -377,6 +376,7 @@ func (suite *NTPSuite) TestSyncChangeTimeservers() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.DisableRTC = true
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -416,6 +416,7 @@ func (suite *NTPSuite) TestSyncIterateTimeservers() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
syncer.MinPoll = time.Second
syncer.MaxPoll = time.Second
@ -468,6 +469,7 @@ func (suite *NTPSuite) TestSyncEpochChange() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -507,6 +509,7 @@ func (suite *NTPSuite) TestSyncSwitchTimeservers() {
syncer.AdjustTime = suite.adjustSystemClock
syncer.CurrentTime = suite.getSystemClock
syncer.NTPQuery = suite.fakeQuery
syncer.DisableRTC = true
syncer.MinPoll = time.Second
syncer.MaxPoll = time.Second

View File

@ -1,17 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package ntp
import (
"sync"
"github.com/u-root/u-root/pkg/rtc"
)
// Global instance of RTC clock because `rtc` doesn't support closing.
var (
RTCClock *rtc.RTC
RTCClockInitialize sync.Once
)