mirror of
https://github.com/siderolabs/omni.git
synced 2026-03-31 13:41:04 +02:00
The gotextdiff/myers library uses the naive Myers algorithm variant that stores the full edit trace, resulting in O((M+N)^2) space complexity. For machine configs with large inline K8s manifests (thousands of lines), this causes massive memory spikes — e.g., 80K lines allocates ~98 GB and gets OOM-killed. Replace it with neticdk/go-stdlib/diff/myers which implements the linear-space Myers variant (divide-and-conquer). Memory usage drops from ~25 GB to ~8 MB for 40K-line inputs. The diff output format is unchanged (unified diff with @@ hunks). Co-authored-by: Artem Chernyshev <artem.chernyshev@talos-systems.com> Co-authored-by: Oguz Kilcan <oguz.kilcan@siderolabs.com> Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
59 lines
2.0 KiB
Go
59 lines
2.0 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
// Package diff provides a memory-safe unified diff computation.
|
|
package diff
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/neticdk/go-stdlib/diff/myers"
|
|
)
|
|
|
|
// MaxLines is the maximum total number of lines (old + new) for which a full
|
|
// structural diff is computed. Beyond this the diff is summarized because no
|
|
// human can meaningfully review it and the algorithmic cost becomes prohibitive.
|
|
const MaxLines = 75_000
|
|
|
|
// Compute returns a unified diff (without the --- / +++ header) between two
|
|
// byte slices. For inputs whose combined line count exceeds MaxLines it returns
|
|
// a short summary instead.
|
|
func Compute(previousData, newData []byte) (string, error) {
|
|
if bytes.Equal(previousData, newData) {
|
|
return "", nil
|
|
}
|
|
|
|
prevLines := bytes.Count(previousData, []byte("\n"))
|
|
newLines := bytes.Count(newData, []byte("\n"))
|
|
|
|
if prevLines+newLines > MaxLines {
|
|
return fmt.Sprintf("@@ -%d,%d +%d,%d @@ diff too large to display\n", 1, prevLines, 1, newLines), nil
|
|
}
|
|
|
|
result, err := myers.Diff(string(previousData), string(newData),
|
|
myers.WithUnifiedFormatter(),
|
|
myers.WithLinearSpace(true),
|
|
// Disable the library's standard-Myers and LCS fallback paths:
|
|
// - Standard Myers (< smallInputThreshold) is O((N+M)²) when inputs are asymmetric.
|
|
// - LCS (> largeInputThreshold) is O(N*M) for the DP table.
|
|
// By setting these to 0 and MaxLines respectively, only Hirschberg's
|
|
// O(N+M) linear-space algorithm runs. Our MaxLines guard above ensures
|
|
// inputs never exceed largeInputThreshold.
|
|
myers.WithSmallInputThreshold(0),
|
|
myers.WithLargeInputThreshold(MaxLines),
|
|
)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Strip the "--- a\n+++ b\n" header that the library always prepends.
|
|
if after, ok := strings.CutPrefix(result, "--- a\n+++ b\n"); ok {
|
|
result = after
|
|
}
|
|
|
|
return result, nil
|
|
}
|