fix: bump apid memory limit

Fixes #11046

Test up to the maximum gRPC message size as we support.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2025-05-20 22:12:13 +04:00
parent 5451f35b14
commit 0b99631a0b
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 87 additions and 10 deletions

View File

@ -7,15 +7,22 @@
package api
import (
"bytes"
"context"
"slices"
"testing"
"time"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/dustin/go-humanize"
"google.golang.org/grpc/codes"
"github.com/siderolabs/talos/internal/integration/base"
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/config"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
)
@ -35,15 +42,7 @@ func (suite *ApidSuite) SuiteName() string {
// SetupTest ...
func (suite *ApidSuite) SetupTest() {
// make sure API calls have timeout
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 15*time.Second)
if suite.Cluster == nil {
suite.T().Skip("information about routable endpoints is not available")
}
if suite.APISuite.Endpoint != "" {
suite.T().Skip("test skipped as custom endpoint is set")
}
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), time.Minute)
}
// TearDownTest ...
@ -55,6 +54,14 @@ func (suite *ApidSuite) TearDownTest() {
// TestControlPlaneRouting verify access to all nodes via each control plane node as an endpoints.
func (suite *ApidSuite) TestControlPlaneRouting() {
if suite.Cluster == nil {
suite.T().Skip("information about routable endpoints is not available")
}
if suite.APISuite.Endpoint != "" {
suite.T().Skip("test skipped as custom endpoint is set")
}
endpoints := suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeControlPlane)
nodes := suite.DiscoverNodeInternalIPs(suite.ctx)
@ -97,6 +104,14 @@ func (suite *ApidSuite) TestControlPlaneRouting() {
// TestWorkerNoRouting verifies that worker nodes perform no routing.
func (suite *ApidSuite) TestWorkerNoRouting() {
if suite.Cluster == nil {
suite.T().Skip("information about routable endpoints is not available")
}
if suite.APISuite.Endpoint != "" {
suite.T().Skip("test skipped as custom endpoint is set")
}
endpoints := suite.DiscoverNodeInternalIPsByType(suite.ctx, machine.TypeWorker)
nodes := suite.DiscoverNodeInternalIPs(suite.ctx)
@ -155,6 +170,68 @@ func (suite *ApidSuite) TestWorkerNoRouting() {
}
}
// TestBigPayload verifies that big payloads are handled correctly.
func (suite *ApidSuite) TestBigPayload() {
if testing.Short() {
suite.T().Skip("skipping test in short mode")
}
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)
nodeCtx := client.WithNode(suite.ctx, node)
suite.T().Logf("testing big payload on node %s", node)
// we are going to simulate a big payload by making machine configuration big enough
cfg, err := safe.StateGetByID[*config.MachineConfig](nodeCtx, suite.Client.COSI, config.ActiveID)
suite.Require().NoError(err)
originalCfg, err := cfg.Container().Bytes()
suite.Require().NoError(err)
// the config is encoded twice in the resource gRPC message, so ensure that we can get to the one third of the size
const targetConfigSize = constants.GRPCMaxMessageSize / 3
suite.T().Logf("original config size: %d (%s), target size is %d (%s)",
len(originalCfg), humanize.Bytes(uint64(len(originalCfg))), targetConfigSize, humanize.Bytes(uint64(targetConfigSize)),
)
bytesToAdd := targetConfigSize - len(originalCfg)
if bytesToAdd <= 0 {
suite.T().Skip("configuration is already big enough")
}
const commentLine = "# this is a comment line added to make the config bigger and bigger and bigger and bigger all the way\n"
newConfig := slices.Concat(originalCfg, bytes.Repeat([]byte(commentLine), bytesToAdd/len(commentLine)+1))
suite.Assert().Greater(len(newConfig), targetConfigSize)
_, err = suite.Client.ApplyConfiguration(nodeCtx, &machineapi.ApplyConfigurationRequest{
Data: newConfig,
Mode: machineapi.ApplyConfigurationRequest_NO_REBOOT,
})
suite.Require().NoError(err)
// now get the machine configuration back several times
for range 5 {
cfg, err = safe.StateGetByID[*config.MachineConfig](nodeCtx, suite.Client.COSI, config.ActiveID)
suite.Require().NoError(err)
// check that the configuration is the same
newCfg, err := cfg.Container().Bytes()
suite.Require().NoError(err)
suite.Assert().Equal(newConfig, newCfg)
}
// revert the configuration
_, err = suite.Client.ApplyConfiguration(nodeCtx, &machineapi.ApplyConfigurationRequest{
Data: originalCfg,
Mode: machineapi.ApplyConfigurationRequest_NO_REBOOT,
})
suite.Require().NoError(err)
}
func init() {
allSuites = append(allSuites, new(ApidSuite))
}

View File

@ -796,7 +796,7 @@ const (
CgroupApidReservedMemory = 16 * 1024 * 1024
// CgroupApidMaxMemory is the hard memory limit for the apid process.
CgroupApidMaxMemory = 40 * 1024 * 1024
CgroupApidMaxMemory = 128 * 1024 * 1024
// CgroupApidMillicores is the CPU weight for the apid process.
CgroupApidMillicores = 500