mirror of
https://github.com/siderolabs/omni.git
synced 2026-04-01 22:21:04 +02:00
The gotextdiff/myers library uses the naive Myers algorithm variant that stores the full edit trace, resulting in O((M+N)^2) space complexity. For machine configs with large inline K8s manifests (thousands of lines), this causes massive memory spikes — e.g., 80K lines allocates ~98 GB and gets OOM-killed. Replace it with neticdk/go-stdlib/diff/myers which implements the linear-space Myers variant (divide-and-conquer). Memory usage drops from ~25 GB to ~8 MB for 40K-line inputs. The diff output format is unchanged (unified diff with @@ hunks). Co-authored-by: Artem Chernyshev <artem.chernyshev@talos-systems.com> Co-authored-by: Oguz Kilcan <oguz.kilcan@siderolabs.com> Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
406 lines
11 KiB
Go
406 lines
11 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
package diff_test
|
|
|
|
import (
|
|
"bytes"
|
|
_ "embed"
|
|
"fmt"
|
|
"runtime"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/siderolabs/talos/pkg/machinery/config/configloader"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/container"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/encoder"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/siderolabs/omni/client/pkg/diff"
|
|
)
|
|
|
|
// BenchmarkComputeDiff tests various state transitions of the diff logic.
|
|
func BenchmarkComputeDiff(b *testing.B) {
|
|
modifiedConfigBytes := modifyConfig(b, baseConfigBytes, func(c *v1alpha1.Config) {
|
|
c.MachineConfig.MachineFiles = append(c.MachineConfig.MachineFiles,
|
|
&v1alpha1.MachineFile{
|
|
FileContent: "aaa",
|
|
FilePermissions: 0o777,
|
|
FilePath: "/var/f",
|
|
FileOp: "create",
|
|
},
|
|
)
|
|
})
|
|
|
|
installChangeConfigBytes := modifyConfig(b, baseConfigBytes, func(c *v1alpha1.Config) {
|
|
c.MachineConfig.MachineInstall.InstallDisk = "/dev/sdb"
|
|
})
|
|
|
|
b.ResetTimer()
|
|
|
|
b.Run("EmptyToEmpty", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(nil, nil) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("EmptyToPopulated", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(nil, baseConfigBytes) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("PopulatedToEmpty", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(baseConfigBytes, nil) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("Identical", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(baseConfigBytes, baseConfigBytes) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("RealDiff", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(baseConfigBytes, modifiedConfigBytes) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("InstallSectionIgnored", func(b *testing.B) {
|
|
for range b.N {
|
|
diff.Compute(baseConfigBytes, installChangeConfigBytes) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
largeYAML := generateLargeYAML(10000)
|
|
|
|
b.Run("EmptyToLargeYAML", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
|
|
for range b.N {
|
|
diff.Compute(nil, largeYAML) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
b.Run("LargeYAMLToEmpty", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
|
|
for range b.N {
|
|
diff.Compute(largeYAML, nil) //nolint:errcheck
|
|
}
|
|
})
|
|
|
|
largeYAMLModified := generateLargeYAML(10001)
|
|
|
|
b.Run("LargeYAMLToDifferentLargeYAML", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
|
|
for range b.N {
|
|
diff.Compute(largeYAML, largeYAMLModified) //nolint:errcheck
|
|
}
|
|
})
|
|
}
|
|
|
|
func modifyConfig(t testing.TB, data []byte, update func(*v1alpha1.Config)) []byte {
|
|
cfg, err := configloader.NewFromBytes(data)
|
|
require.NoError(t, err)
|
|
|
|
c, err := container.New(cfg.Documents()...)
|
|
require.NoError(t, err)
|
|
|
|
v1Cfg := c.RawV1Alpha1()
|
|
require.NotNil(t, v1Cfg)
|
|
|
|
update(v1Cfg)
|
|
|
|
newData, err := c.EncodeBytes(encoder.WithComments(encoder.CommentsDisabled))
|
|
require.NoError(t, err)
|
|
|
|
return newData
|
|
}
|
|
|
|
//go:embed testdata/base-config.yaml
|
|
var baseConfigBytes []byte
|
|
|
|
//go:embed testdata/empty-to-populated.diff
|
|
var emptyToPopulatedDiff string
|
|
|
|
//go:embed testdata/populated-to-empty.diff
|
|
var populatedToEmptyDiff string
|
|
|
|
// TestComputeDiff tests the ComputeDiff function with various state transitions.
|
|
func TestComputeDiff(t *testing.T) {
|
|
modifiedConfigBytes := modifyConfig(t, baseConfigBytes, func(c *v1alpha1.Config) {
|
|
c.MachineConfig.MachineFiles = append(c.MachineConfig.MachineFiles,
|
|
&v1alpha1.MachineFile{
|
|
FileContent: "aaa",
|
|
FilePermissions: 0o777,
|
|
FilePath: "/var/f",
|
|
FileOp: "create",
|
|
},
|
|
)
|
|
})
|
|
|
|
tests := []struct {
|
|
name string
|
|
wantDiff string
|
|
previousData []byte
|
|
newData []byte
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "empty to empty",
|
|
previousData: nil,
|
|
newData: nil,
|
|
},
|
|
{
|
|
name: "empty to populated",
|
|
previousData: nil,
|
|
newData: baseConfigBytes,
|
|
wantDiff: emptyToPopulatedDiff,
|
|
},
|
|
{
|
|
name: "populated to empty",
|
|
previousData: baseConfigBytes,
|
|
newData: nil,
|
|
wantDiff: populatedToEmptyDiff,
|
|
},
|
|
{
|
|
name: "identical",
|
|
previousData: baseConfigBytes,
|
|
newData: baseConfigBytes,
|
|
wantDiff: "",
|
|
},
|
|
{
|
|
name: "real diff",
|
|
previousData: baseConfigBytes,
|
|
newData: modifiedConfigBytes,
|
|
wantDiff: `@@ -15,6 +15,11 @@
|
|
install:
|
|
wipe: false
|
|
grubUseUKICmdline: true
|
|
+ files:
|
|
+ - content: aaa
|
|
+ permissions: 0o777
|
|
+ path: /var/f
|
|
+ op: create
|
|
features:
|
|
diskQuotaSupport: true
|
|
kubePrism:
|
|
`,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
diff, err := diff.Compute(tt.previousData, tt.newData)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
require.Equal(t, tt.wantDiff, diff)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestComputeDiff_LargeInput(t *testing.T) {
|
|
t.Run("structural diff for inputs under threshold", func(t *testing.T) {
|
|
small := generateLargeYAML(1000) // ~8000 lines, well under 50k
|
|
|
|
diff, err := diff.Compute(nil, small)
|
|
require.NoError(t, err)
|
|
assert.NotEmpty(t, diff)
|
|
assert.True(t, strings.HasPrefix(diff, "@@ -0,0 +1,"))
|
|
})
|
|
|
|
t.Run("bulk diff for inputs over threshold", func(t *testing.T) {
|
|
largeYAML := generateLargeYAML(10000) // ~80k lines, over 50k threshold
|
|
|
|
diff, err := diff.Compute(nil, largeYAML)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, diff, "diff too large to display")
|
|
})
|
|
|
|
t.Run("bulk diff large to empty", func(t *testing.T) {
|
|
largeYAML := generateLargeYAML(10000)
|
|
|
|
diff, err := diff.Compute(largeYAML, nil)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, diff, "diff too large to display")
|
|
})
|
|
|
|
t.Run("bulk diff large to different large", func(t *testing.T) {
|
|
largeYAML := generateLargeYAML(10000)
|
|
largeYAMLModified := generateLargeYAML(10001)
|
|
|
|
diff, err := diff.Compute(largeYAML, largeYAMLModified)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, diff, "diff too large to display")
|
|
})
|
|
}
|
|
|
|
// TestComputeDiff_MemoryBudget asserts that diff.Compute stays within a memory
|
|
// budget across a range of input sizes and patterns.
|
|
func TestComputeDiff_MemoryBudget(t *testing.T) {
|
|
const memoryBudget = 50 * 1024 * 1024 // 50 MB hard ceiling
|
|
|
|
// generateLines builds a YAML-like blob with exactly n newline-terminated lines.
|
|
// When variant > 0 every variant-th line is changed so the diff is non-trivial.
|
|
generateLines := func(n, variant int) []byte {
|
|
var sb strings.Builder
|
|
|
|
for i := range n {
|
|
if variant > 0 && i%variant == 0 {
|
|
fmt.Fprintf(&sb, "line-%d-variant-%d\n", i, variant)
|
|
} else {
|
|
fmt.Fprintf(&sb, "line-%d\n", i)
|
|
}
|
|
}
|
|
|
|
return []byte(sb.String())
|
|
}
|
|
|
|
tests := []struct {
|
|
old func() []byte
|
|
new func() []byte
|
|
name string
|
|
maxMemBytes uint64
|
|
}{
|
|
{
|
|
name: "small similar configs (100 lines, few changes)",
|
|
old: func() []byte { return generateLines(100, 0) },
|
|
new: func() []byte { return generateLines(100, 10) },
|
|
maxMemBytes: 1 * 1024 * 1024, // 1 MB
|
|
},
|
|
{
|
|
name: "small completely different (100 lines)",
|
|
old: func() []byte { return generateLines(100, 0) },
|
|
new: func() []byte { return generateLines(100, 1) },
|
|
maxMemBytes: 1 * 1024 * 1024,
|
|
},
|
|
{
|
|
name: "medium similar (5K lines, few changes)",
|
|
old: func() []byte { return generateLines(5000, 0) },
|
|
new: func() []byte { return generateLines(5000, 50) },
|
|
maxMemBytes: 10 * 1024 * 1024, // 10 MB
|
|
},
|
|
{
|
|
name: "medium completely different (5K lines)",
|
|
old: func() []byte { return generateLines(5000, 0) },
|
|
new: func() []byte { return generateLines(5000, 1) },
|
|
maxMemBytes: 10 * 1024 * 1024,
|
|
},
|
|
{
|
|
name: "large similar (30K lines, few changes)",
|
|
old: func() []byte { return generateLines(30000, 0) },
|
|
new: func() []byte { return generateLines(30000, 100) },
|
|
maxMemBytes: 50 * 1024 * 1024, // 50 MB
|
|
},
|
|
{
|
|
name: "large completely different (30K lines)",
|
|
old: func() []byte { return generateLines(30000, 0) },
|
|
new: func() []byte { return generateLines(30000, 1) },
|
|
maxMemBytes: 100 * 1024 * 1024, // 100 MB
|
|
},
|
|
{
|
|
name: "asymmetric: empty to 30K lines",
|
|
old: func() []byte { return nil },
|
|
new: func() []byte { return generateLines(30000, 0) },
|
|
maxMemBytes: 50 * 1024 * 1024,
|
|
},
|
|
{
|
|
name: "asymmetric: 50 lines to 30K lines",
|
|
old: func() []byte { return generateLines(50, 0) },
|
|
new: func() []byte { return generateLines(30000, 0) },
|
|
maxMemBytes: 50 * 1024 * 1024,
|
|
},
|
|
{
|
|
name: "near threshold (total ~74K lines)",
|
|
old: func() []byte { return generateLines(37000, 0) },
|
|
new: func() []byte { return generateLines(37000, 1) },
|
|
maxMemBytes: memoryBudget,
|
|
},
|
|
{
|
|
name: "over threshold (total ~76K lines, returns summary)",
|
|
old: func() []byte { return generateLines(38000, 0) },
|
|
new: func() []byte { return generateLines(38000, 1) },
|
|
maxMemBytes: 1 * 1024 * 1024, // summary path allocates almost nothing
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
oldData := tt.old()
|
|
newData := tt.new()
|
|
|
|
oldLines := bytes.Count(oldData, []byte("\n"))
|
|
newLines := bytes.Count(newData, []byte("\n"))
|
|
|
|
// Measure allocations.
|
|
runtime.GC()
|
|
|
|
var before, after runtime.MemStats
|
|
|
|
runtime.ReadMemStats(&before)
|
|
|
|
result, err := diff.Compute(oldData, newData)
|
|
require.NoError(t, err)
|
|
|
|
runtime.ReadMemStats(&after)
|
|
|
|
allocated := after.TotalAlloc - before.TotalAlloc
|
|
|
|
t.Logf("oldData=%d lines, newData=%d lines, allocated=%s, result=%d bytes",
|
|
oldLines, newLines, formatBytes(allocated), len(result))
|
|
|
|
assert.Less(t, allocated, tt.maxMemBytes,
|
|
"memory budget exceeded: allocated %s, limit %s",
|
|
formatBytes(allocated), formatBytes(tt.maxMemBytes))
|
|
})
|
|
}
|
|
}
|
|
|
|
func formatBytes(b uint64) string {
|
|
switch {
|
|
case b >= 1024*1024*1024:
|
|
return fmt.Sprintf("%.1f GB", float64(b)/(1024*1024*1024))
|
|
case b >= 1024*1024:
|
|
return fmt.Sprintf("%.1f MB", float64(b)/(1024*1024))
|
|
case b >= 1024:
|
|
return fmt.Sprintf("%.1f KB", float64(b)/1024)
|
|
default:
|
|
return fmt.Sprintf("%d B", b)
|
|
}
|
|
}
|
|
|
|
// generateLargeYAML generates a YAML document with the given number of
|
|
// K8s-manifest-like entries to simulate large inline manifests configs.
|
|
func generateLargeYAML(n int) []byte {
|
|
var sb strings.Builder
|
|
|
|
for i := range n {
|
|
if i > 0 {
|
|
sb.WriteString("---\n")
|
|
}
|
|
|
|
fmt.Fprintf(&sb, "apiVersion: v1\n")
|
|
fmt.Fprintf(&sb, "kind: ConfigMap\n")
|
|
fmt.Fprintf(&sb, "metadata:\n")
|
|
fmt.Fprintf(&sb, " name: manifest-%d\n", i)
|
|
fmt.Fprintf(&sb, " namespace: default\n")
|
|
fmt.Fprintf(&sb, "data:\n")
|
|
fmt.Fprintf(&sb, " key: value-%d\n", i)
|
|
}
|
|
|
|
return []byte(sb.String())
|
|
}
|