fix: retry blockdevice open in the installer

We had these retries in other places, but not here.

This seems to happen more frequently with Linux 6.6 update, the tl;dr is
same: `udevd` tries to rescan the partition table at the wrong moment,
preventing Talos installer to open the partition which was just created.

It's a race, so workaround it by retrying the call.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2024-01-31 22:17:20 +04:00
parent 593afeea38
commit a5e13c696d
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811

View File

@ -9,9 +9,11 @@ import (
"fmt"
"log"
"os"
"time"
"github.com/siderolabs/go-blockdevice/blockdevice"
"github.com/siderolabs/go-procfs/procfs"
"github.com/siderolabs/go-retry/retry"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/board"
@ -229,7 +231,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) {
var bd *blockdevice.BlockDevice
bd, err = blockdevice.Open(device)
bd, err = retryBlockdeviceOpen(device)
if err != nil {
return err
}
@ -368,3 +370,20 @@ func (i *Installer) runPreflightChecks(mode Mode) error {
return checks.Run(ctx)
}
func retryBlockdeviceOpen(device string) (*blockdevice.BlockDevice, error) {
var bd *blockdevice.BlockDevice
err := retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(func() error {
var openErr error
bd, openErr = blockdevice.Open(device)
if openErr != nil && os.IsNotExist(openErr) {
return retry.ExpectedError(openErr)
}
return openErr
})
return bd, err
}