From c735d149284a3012034cd06108443f12ac331507 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 27 Nov 2024 20:33:03 +0400 Subject: [PATCH] fix: wait for udevd before starting sync Fixes #8074 One part of the fix is to wait for udevd to be ready, as anyways before udevd is ready network interfaces are not ready, so sync is not possible. Second part is that now u-root's rtc package supports closing rtc devices, so we can properly open/close it as part of the sync loop (vs. previous workaround with sync.Once). Signed-off-by: Andrey Smirnov --- .../app/machined/pkg/controllers/time/sync.go | 34 ++++++++++++------- .../pkg/controllers/time/sync_test.go | 6 ++++ internal/pkg/ntp/ntp.go | 22 +++++++----- internal/pkg/ntp/ntp_test.go | 13 ++++--- internal/pkg/ntp/rtc.go | 17 ---------- 5 files changed, 49 insertions(+), 43 deletions(-) delete mode 100644 internal/pkg/ntp/rtc.go diff --git a/internal/app/machined/pkg/controllers/time/sync.go b/internal/app/machined/pkg/controllers/time/sync.go index d65d90b6b..250a1e9cd 100644 --- a/internal/app/machined/pkg/controllers/time/sync.go +++ b/internal/app/machined/pkg/controllers/time/sync.go @@ -17,6 +17,7 @@ import ( "github.com/siderolabs/gen/optional" "go.uber.org/zap" + "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/runtime" v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/internal/pkg/ntp" "github.com/siderolabs/talos/pkg/machinery/resources/config" @@ -39,19 +40,7 @@ func (ctrl *SyncController) Name() string { // Inputs implements controller.Controller interface. func (ctrl *SyncController) Inputs() []controller.Input { - return []controller.Input{ - { - Namespace: network.NamespaceName, - Type: network.TimeServerStatusType, - ID: optional.Some(network.TimeServerID), - Kind: controller.InputWeak, - }, - { - Namespace: config.NamespaceName, - Type: config.MachineConfigType, - ID: optional.Some(config.V1Alpha1ID), - }, - } + return nil } // Outputs implements controller.Controller interface. @@ -89,6 +78,25 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge } } + // wait for udevd to be healthy, which implies that all RTC devices + if err := runtime.WaitForDevicesReady(ctx, r, + []controller.Input{ + { + Namespace: network.NamespaceName, + Type: network.TimeServerStatusType, + ID: optional.Some(network.TimeServerID), + Kind: controller.InputWeak, + }, + { + Namespace: config.NamespaceName, + Type: config.MachineConfigType, + ID: optional.Some(config.V1Alpha1ID), + }, + }, + ); err != nil { + return err + } + var ( syncCtx context.Context syncCtxCancel context.CancelFunc diff --git a/internal/app/machined/pkg/controllers/time/sync_test.go b/internal/app/machined/pkg/controllers/time/sync_test.go index 4998892e0..b901ff1b6 100644 --- a/internal/app/machined/pkg/controllers/time/sync_test.go +++ b/internal/app/machined/pkg/controllers/time/sync_test.go @@ -30,6 +30,7 @@ import ( "github.com/siderolabs/talos/pkg/machinery/constants" "github.com/siderolabs/talos/pkg/machinery/resources/config" "github.com/siderolabs/talos/pkg/machinery/resources/network" + runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime" timeresource "github.com/siderolabs/talos/pkg/machinery/resources/time" v1alpha1resource "github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1" ) @@ -62,6 +63,11 @@ func (suite *SyncSuite) SetupTest() { suite.runtime, err = runtime.NewRuntime(suite.state, zaptest.NewLogger(suite.T())) suite.Require().NoError(err) + + // create fake device ready status + deviceStatus := runtimeres.NewDevicesStatus(runtimeres.NamespaceName, runtimeres.DevicesID) + deviceStatus.TypedSpec().Ready = true + suite.Require().NoError(suite.state.Create(suite.ctx, deviceStatus)) } func (suite *SyncSuite) startRuntime() { diff --git a/internal/pkg/ntp/ntp.go b/internal/pkg/ntp/ntp.go index 05dac14dd..6dddb499c 100644 --- a/internal/pkg/ntp/ntp.go +++ b/internal/pkg/ntp/ntp.go @@ -51,6 +51,7 @@ type Syncer struct { CurrentTime CurrentTimeFunc NTPQuery QueryFunc AdjustTime AdjustTimeFunc + DisableRTC bool } // Measurement is a struct containing correction data based on a time request. @@ -159,14 +160,19 @@ func (syncer *Syncer) isSpike(resp *ntp.Response) bool { // //nolint:gocyclo,cyclop func (syncer *Syncer) Run(ctx context.Context) { - RTCClockInitialize.Do(func() { - var err error + var ( + rtcClock *rtc.RTC + err error + ) - RTCClock, err = rtc.OpenRTC() + if !syncer.DisableRTC { + rtcClock, err = rtc.OpenRTC() if err != nil { syncer.logger.Error("failure opening RTC, ignored", zap.Error(err)) + } else { + defer rtcClock.Close() //nolint:errcheck } - }) + } pollInterval := time.Duration(0) @@ -216,7 +222,7 @@ func (syncer *Syncer) Run(ctx context.Context) { ) if resp != nil && !spike { - err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval) + err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval, rtcClock) if err == nil { if !syncer.timeSyncNotified { @@ -415,7 +421,7 @@ func log2i(v uint64) int { // adjustTime adds an offset to the current time. // //nolint:gocyclo -func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndicator, server string, nextPollInterval time.Duration) error { +func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndicator, server string, nextPollInterval time.Duration, rtcClock *rtc.RTC) error { var ( buf bytes.Buffer req unix.Timex @@ -509,8 +515,8 @@ func (syncer *Syncer) adjustTime(offset time.Duration, leapSecond ntp.LeapIndica } if jump { - if RTCClock != nil { - if rtcErr := RTCClock.Set(time.Now().Add(offset)); rtcErr != nil { + if rtcClock != nil { + if rtcErr := rtcClock.Set(time.Now().Add(offset)); rtcErr != nil { syncer.logger.Error("error syncing RTC", zap.Error(rtcErr)) } else { syncer.logger.Info("synchronized RTC with system clock") diff --git a/internal/pkg/ntp/ntp_test.go b/internal/pkg/ntp/ntp_test.go index f2cc0ce17..e2bfe503d 100644 --- a/internal/pkg/ntp/ntp_test.go +++ b/internal/pkg/ntp/ntp_test.go @@ -40,11 +40,6 @@ func TestNTPSuite(t *testing.T) { suite.Run(t, new(NTPSuite)) } -func (suite *NTPSuite) SetupSuite() { - // disable RTC clock - ntp.RTCClockInitialize.Do(func() {}) -} - func (suite *NTPSuite) SetupTest() { suite.systemClock = time.Now().UTC() suite.clockAdjustments = nil @@ -194,6 +189,7 @@ func (suite *NTPSuite) TestSync() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock + syncer.DisableRTC = true ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -225,6 +221,7 @@ func (suite *NTPSuite) TestSyncContinuous() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true syncer.MinPoll = time.Second syncer.MaxPoll = time.Second @@ -273,6 +270,7 @@ func (suite *NTPSuite) TestSyncKissOfDeath() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true syncer.MinPoll = time.Second syncer.MaxPoll = time.Second @@ -326,6 +324,7 @@ func (suite *NTPSuite) TestSyncWithSpikes() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true syncer.MinPoll = time.Second syncer.MaxPoll = time.Second @@ -377,6 +376,7 @@ func (suite *NTPSuite) TestSyncChangeTimeservers() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock + syncer.DisableRTC = true ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -416,6 +416,7 @@ func (suite *NTPSuite) TestSyncIterateTimeservers() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true syncer.MinPoll = time.Second syncer.MaxPoll = time.Second @@ -468,6 +469,7 @@ func (suite *NTPSuite) TestSyncEpochChange() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -507,6 +509,7 @@ func (suite *NTPSuite) TestSyncSwitchTimeservers() { syncer.AdjustTime = suite.adjustSystemClock syncer.CurrentTime = suite.getSystemClock syncer.NTPQuery = suite.fakeQuery + syncer.DisableRTC = true syncer.MinPoll = time.Second syncer.MaxPoll = time.Second diff --git a/internal/pkg/ntp/rtc.go b/internal/pkg/ntp/rtc.go deleted file mode 100644 index a6b0a5838..000000000 --- a/internal/pkg/ntp/rtc.go +++ /dev/null @@ -1,17 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -package ntp - -import ( - "sync" - - "github.com/u-root/u-root/pkg/rtc" -) - -// Global instance of RTC clock because `rtc` doesn't support closing. -var ( - RTCClock *rtc.RTC - RTCClockInitialize sync.Once -)