fix: one more attempt to fix volume mount race on restart

The issue seems to be around still racy service restarts
which leads to a potential hang on a conflicting state.

By not re-using the mount request IDs on each restart, hopefully we can
improve on it.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2025-08-01 20:48:44 +04:00
parent 34d37a268a
commit 8d12db480c
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 12 additions and 7 deletions

View File

@ -11,6 +11,7 @@ import (
"log" "log"
"slices" "slices"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/siderolabs/gen/xslices" "github.com/siderolabs/gen/xslices"
@ -37,6 +38,7 @@ type ServiceRunner struct {
service Service service Service
id string id string
instance *singleton instance *singleton
generation atomic.Int64
state events.ServiceState state events.ServiceState
events events.ServiceEvents events events.ServiceEvents
@ -200,6 +202,8 @@ func (svcrunner *ServiceRunner) Run(notifyChannels ...chan<- struct{}) error {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
generation := svcrunner.generation.Add(1)
go func() { go func() {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -229,7 +233,7 @@ func (svcrunner *ServiceRunner) Run(notifyChannels ...chan<- struct{}) error {
volumeRequests := make([]volumeRequest, 0, len(volumeIDs)) volumeRequests := make([]volumeRequest, 0, len(volumeIDs))
for _, volumeID := range volumeIDs { for _, volumeID := range volumeIDs {
requestID, err := svcrunner.createVolumeMountRequest(ctx, volumeID) requestID, err := svcrunner.createVolumeMountRequest(ctx, volumeID, generation)
if err != nil { if err != nil {
return err return err
} }

View File

@ -7,6 +7,7 @@ package system
import ( import (
"context" "context"
"fmt" "fmt"
"strconv"
"github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state" "github.com/cosi-project/runtime/pkg/state"
@ -15,10 +16,10 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/block" "github.com/siderolabs/talos/pkg/machinery/resources/block"
) )
func (svcrunner *ServiceRunner) createVolumeMountRequest(ctx context.Context, volumeID string) (string, error) { func (svcrunner *ServiceRunner) createVolumeMountRequest(ctx context.Context, volumeID string, generation int64) (string, error) {
st := svcrunner.runtime.State().V1Alpha2().Resources() st := svcrunner.runtime.State().V1Alpha2().Resources()
requester := "service/" + svcrunner.id requester := "service/" + svcrunner.id
requestID := requester + "-" + volumeID requestID := requester + "-" + volumeID + "-" + strconv.FormatInt(generation, 10)
mountRequest := block.NewVolumeMountRequest(block.NamespaceName, requestID) mountRequest := block.NewVolumeMountRequest(block.NamespaceName, requestID)
mountRequest.TypedSpec().Requester = requester mountRequest.TypedSpec().Requester = requester