fix: one more attempt to fix volume mount race on restart

The issue seems to be around still racy service restarts
which leads to a potential hang on a conflicting state.

By not re-using the mount request IDs on each restart, hopefully we can
improve on it.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2025-08-01 20:48:44 +04:00
parent 34d37a268a
commit 8d12db480c
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 12 additions and 7 deletions

View File

@ -11,6 +11,7 @@ import (
"log"
"slices"
"sync"
"sync/atomic"
"time"
"github.com/siderolabs/gen/xslices"
@ -33,10 +34,11 @@ var WaitConditionCheckInterval = time.Second
type ServiceRunner struct {
mu sync.Mutex
runtime runtime.Runtime
service Service
id string
instance *singleton
runtime runtime.Runtime
service Service
id string
instance *singleton
generation atomic.Int64
state events.ServiceState
events events.ServiceEvents
@ -200,6 +202,8 @@ func (svcrunner *ServiceRunner) Run(notifyChannels ...chan<- struct{}) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
generation := svcrunner.generation.Add(1)
go func() {
select {
case <-ctx.Done():
@ -229,7 +233,7 @@ func (svcrunner *ServiceRunner) Run(notifyChannels ...chan<- struct{}) error {
volumeRequests := make([]volumeRequest, 0, len(volumeIDs))
for _, volumeID := range volumeIDs {
requestID, err := svcrunner.createVolumeMountRequest(ctx, volumeID)
requestID, err := svcrunner.createVolumeMountRequest(ctx, volumeID, generation)
if err != nil {
return err
}

View File

@ -7,6 +7,7 @@ package system
import (
"context"
"fmt"
"strconv"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
@ -15,10 +16,10 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/block"
)
func (svcrunner *ServiceRunner) createVolumeMountRequest(ctx context.Context, volumeID string) (string, error) {
func (svcrunner *ServiceRunner) createVolumeMountRequest(ctx context.Context, volumeID string, generation int64) (string, error) {
st := svcrunner.runtime.State().V1Alpha2().Resources()
requester := "service/" + svcrunner.id
requestID := requester + "-" + volumeID
requestID := requester + "-" + volumeID + "-" + strconv.FormatInt(generation, 10)
mountRequest := block.NewVolumeMountRequest(block.NamespaceName, requestID)
mountRequest.TypedSpec().Requester = requester