diff --git a/promql/engine.go b/promql/engine.go index 039c5fce7b..2bb7cb4e4f 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3141,6 +3141,13 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { } func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs float64) (underCount, bucketMidpoint float64) { + zeroIfInf := func(x float64) float64 { + if math.IsInf(x, 0) { + return 0 + } + return x + } + // Case 1: Bucket with lower bound (-Inf, upper] if math.IsInf(b.Lower, -1) { // TRIM_UPPER (/) - remove values less than rhs if rhs <= b.Lower { @@ -3168,7 +3175,7 @@ func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs fl return b.Count * (1 - rhs/b.Upper), rhs / 2 } // Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket. - return 0, b.Upper + return 0, zeroIfInf(b.Upper) } // Case 2: Bucket with upper bound [lower, +Inf) @@ -3178,7 +3185,7 @@ func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs fl // We don't care about lower here, because: // when rhs >= lower and the bucket extends to +Inf, some values in this bucket could be > rhs, so we conservatively remove the entire bucket; // when rhs < lower, all values in this bucket are >= lower > rhs, so all values should be removed. - return 0, b.Lower + return 0, zeroIfInf(b.Lower) } // TRIM_LOWER (>/) - remove values less than rhs. if rhs <= b.Lower { @@ -3186,7 +3193,7 @@ func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs fl return b.Count, 0 } // lower < rhs: we are inside the infinity bucket, but as we don't know the exact distribution of values, we conservatively remove the entire bucket. - return 0, b.Lower + return 0, zeroIfInf(b.Lower) } panic(fmt.Errorf("one of the buckets must be infinite for handleInfinityBuckets, got %v", b)) @@ -3229,31 +3236,20 @@ func computeBucketTrim(b histogram.Bucket[float64], rhs float64, isUpperTrim, is underCount := computeSplit(b, rhs, isPositive, isCustomBucket) if isUpperTrim { - product := math.Abs(b.Lower) * math.Abs(rhs) - return underCount, computeMidpoint(b, product, isCustomBucket, isPositive) + return underCount, computeMidpoint(b.Lower, rhs, isPositive, isCustomBucket) } - product := math.Abs(rhs) * math.Abs(b.Upper) - return b.Count - underCount, computeMidpoint(b, product, isCustomBucket, isPositive) + return b.Count - underCount, computeMidpoint(rhs, b.Upper, isPositive, isCustomBucket) } // Helper function to trim native histogram buckets. // TODO: move trimHistogram to model/histogram/float_histogram.go (making it a method of FloatHistogram). func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTrim bool) { - updatedCount := 0.0 - origSum := trimmedHist.Sum - removedSum := 0.0 - isCustomBucket := trimmedHist.UsesCustomBuckets() - - var hasPositive, hasNegative bool - trackBucketSigns := func(bucket histogram.Bucket[float64]) { - if bucket.Lower < 0 { - hasNegative = true - } - if bucket.Upper > 0 { - hasPositive = true - } - } + var ( + updatedCount, updatedSum float64 + trimmedBuckets bool + isCustomBucket = trimmedHist.UsesCustomBuckets() + ) if isUpperTrim { // Calculate the fraction to keep for buckets that contain the trim value. @@ -3265,28 +3261,26 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTr continue } - keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket) - switch { case bucket.Upper <= rhs: // Bucket is entirely below the trim point - keep all. updatedCount += bucket.Count - trackBucketSigns(bucket) + bucketMidpoint := computeMidpoint(bucket.Lower, bucket.Upper, true, isCustomBucket) + updatedSum += bucketMidpoint * bucket.Count + case bucket.Lower < rhs: // Bucket contains the trim point - interpolate. - removedCount := bucket.Count - keepCount - removedSum += removedCount * bucketMidpoint - trackBucketSigns(bucket) + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket) updatedCount += keepCount + updatedSum += bucketMidpoint * keepCount trimmedHist.PositiveBuckets[i] = keepCount + trimmedBuckets = true + default: - if !isCustomBucket { - bucketMidpoint = math.Sqrt(bucket.Lower * bucket.Upper) - } - removedSum += bucket.Count * bucketMidpoint // Bucket is entirely above the trim point - discard. trimmedHist.PositiveBuckets[i] = 0 + trimmedBuckets = true } } @@ -3296,23 +3290,25 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTr continue } - keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket) - switch { case bucket.Upper <= rhs: + // Bucket is entirely below the trim point - keep all. updatedCount += bucket.Count - trackBucketSigns(bucket) - case bucket.Lower < rhs: - removedCount := bucket.Count - keepCount - removedSum += removedCount * bucketMidpoint + bucketMidpoint := computeMidpoint(bucket.Lower, bucket.Upper, false, isCustomBucket) + updatedSum += bucketMidpoint * bucket.Count + + case bucket.Lower < rhs: + // Bucket contains the trim point - interpolate. + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket) - trimmedHist.NegativeBuckets[i] = keepCount updatedCount += keepCount - trackBucketSigns(bucket) + updatedSum += bucketMidpoint * keepCount + trimmedHist.NegativeBuckets[i] = keepCount + trimmedBuckets = true + default: - bucketMidpoint = -math.Sqrt(bucket.Lower * bucket.Upper) - removedSum += bucket.Count * bucketMidpoint trimmedHist.NegativeBuckets[i] = 0 + trimmedBuckets = true } } } else { // !isUpperTrim @@ -3324,25 +3320,25 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTr continue } - keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket) - switch { case bucket.Lower >= rhs: + // Bucket is entirely below the trim point - keep all. updatedCount += bucket.Count - trackBucketSigns(bucket) - case bucket.Upper > rhs: - removedCount := bucket.Count - keepCount - removedSum += removedCount * bucketMidpoint + bucketMidpoint := computeMidpoint(bucket.Lower, bucket.Upper, true, isCustomBucket) + updatedSum += bucketMidpoint * bucket.Count + + case bucket.Upper > rhs: + // Bucket contains the trim point - interpolate. + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket) - trimmedHist.PositiveBuckets[i] = keepCount updatedCount += keepCount - trackBucketSigns(bucket) + updatedSum += bucketMidpoint * keepCount + trimmedHist.PositiveBuckets[i] = keepCount + trimmedBuckets = true + default: - if !isCustomBucket { - bucketMidpoint = math.Sqrt(bucket.Lower * bucket.Upper) - } - removedSum += bucket.Count * bucketMidpoint trimmedHist.PositiveBuckets[i] = 0 + trimmedBuckets = true } } @@ -3352,23 +3348,25 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTr continue } - keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket) - switch { case bucket.Lower >= rhs: + // Bucket is entirely below the trim point - keep all. updatedCount += bucket.Count - trackBucketSigns(bucket) - case bucket.Upper > rhs: - removedCount := bucket.Count - keepCount - removedSum += removedCount * bucketMidpoint + bucketMidpoint := computeMidpoint(bucket.Lower, bucket.Upper, false, isCustomBucket) + updatedSum += bucketMidpoint * bucket.Count + + case bucket.Upper > rhs: + // Bucket contains the trim point - interpolate. + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket) - trimmedHist.NegativeBuckets[i] = keepCount updatedCount += keepCount - trackBucketSigns(bucket) + updatedSum += bucketMidpoint * keepCount + trimmedHist.NegativeBuckets[i] = keepCount + trimmedBuckets = true + default: - bucketMidpoint = -math.Sqrt(bucket.Lower * bucket.Upper) - removedSum += bucket.Count * bucketMidpoint trimmedHist.NegativeBuckets[i] = 0 + trimmedBuckets = true } } } @@ -3379,41 +3377,42 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTr if !isUpperTrim { keepCount = trimmedHist.ZeroCount - keepCount } - trimmedHist.ZeroCount = keepCount + if trimmedHist.ZeroCount != keepCount { + trimmedHist.ZeroCount = keepCount + trimmedBuckets = true + } updatedCount += keepCount } - newSum := 0.0 - if updatedCount != 0 { - // Calculate new sum only when there are at least some observations remaining. - // Otherwise, make it zero. - newSum = origSum - removedSum + if trimmedBuckets { + // Only update the totals in case some bucket(s) were fully (or partially) trimmed. + trimmedHist.Count = updatedCount + trimmedHist.Sum = updatedSum - // Clamp correction. - if !hasNegative && newSum < 0 { - newSum = 0 - } - if !hasPositive && newSum > 0 { - newSum = 0 - } + trimmedHist.Compact(0) } - - // Update the histogram's count and sum. - trimmedHist.Count = updatedCount - trimmedHist.Sum = newSum - - trimmedHist.Compact(0) } -func computeMidpoint(b histogram.Bucket[float64], product float64, isLinear, isPositive bool) float64 { - if isLinear { - return (b.Lower + b.Upper) / 2 +func computeMidpoint(survivingIntervalBoundA, survivingIntervalBoundB float64, isPositive, isLinear bool) float64 { + if math.IsInf(survivingIntervalBoundA, 0) { + if math.IsInf(survivingIntervalBoundB, 0) { + return 0 + } + return survivingIntervalBoundB + } else if math.IsInf(survivingIntervalBoundB, 0) { + return survivingIntervalBoundA } - if isPositive { - return math.Sqrt(product) + if isLinear { + return (survivingIntervalBoundA + survivingIntervalBoundB) / 2 } - return -math.Sqrt(product) + + geoMean := math.Sqrt(math.Abs(survivingIntervalBoundA * survivingIntervalBoundB)) + + if isPositive { + return geoMean + } + return -geoMean } // vectorElemBinop evaluates a binary operation between two Vector elements. diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 7f2585ef6e..adf5f692e6 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -1894,43 +1894,43 @@ eval instant at 1m h_test / 0 - h_test {{schema:0 sum:127.28553390593274 count:30.5 z_bucket:0.5 z_bucket_w:0.001 buckets:[2 4 8 16]}} + h_test {{schema:0 sum:120.20815280171308 count:30.5 z_bucket:0.5 z_bucket_w:0.001 buckets:[2 4 8 16]}} eval instant at 1m h_test / 1.13 - h_test_2 {{schema:2 count:14.589417818876296 sum:22.078693238664073 z_bucket_w:0.001 offset:1 buckets:[0.589417818876296 4 7 3]}} + h_test_2 {{schema:2 count:14.589417818876296 sum:22.168126492693734 z_bucket_w:0.001 offset:1 buckets:[0.589417818876296 4 7 3]}} eval instant at 1m h_test_2 >/ -1.3 - h_test_2 {{schema:2 count:25.54213947904476 sum:16.183479123487956 z_bucket:1 z_bucket_w:0.001 buckets:[1 2 4 7 3] n_buckets:[1 5 1.54213947904476]}} + h_test_2 {{schema:2 count:25.54213947904476 sum:16.29588491217537 z_bucket:1 z_bucket_w:0.001 buckets:[1 2 4 7 3] n_buckets:[1 5 1.54213947904476]}} eval instant at 1m h_test_2 / 2 - h_test{} {{count:24 sum:120.21446609406726 z_bucket_w:0.001 offset:2 buckets:[8 16]}} + h_test{} {{count:24 sum:113.13708498984761 z_bucket_w:0.001 offset:2 buckets:[8 16]}} eval instant at 1m h_test >/ -1 - h_test{} {{count:32 sum:126.57842712474618 z_bucket:1 z_bucket_w:0.001 buckets:[2 4 8 16] n_buckets:[1]}} + h_test{} {{count:32 sum:119.50104602052653 z_bucket:1 z_bucket_w:0.001 buckets:[2 4 8 16] n_buckets:[1]}} eval instant at 1m h_test / 13 - cbh{} {{schema:-53 count:5.6 sum:92.5 custom_values:[5 10 15 20] offset:2 buckets:[1.6 3 1]}} + cbh{} {{schema:-53 count:5.6 sum:94.9 custom_values:[5 10 15 20] offset:2 buckets:[1.6 3 1]}} eval instant at 1m cbh / 15 # Custom buckets: trim uses linear interpolation if cutoff is inside a bucket eval instant at 1m cbh / -Inf cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}} eval instant at 1m cbh >/ 0 - cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}} + cbh {{schema:-53 sum:167.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}} eval instant at 1m cbh / 0 - zero_bucket{} {{count:7.5 sum:5.270815280171309 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}} + zero_bucket{} {{count:7.5 sum:5.656854249492381 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}} load 1m cbh_one_bucket {{schema:-53 sum:100.0 count:100 buckets:[100]}} @@ -2014,7 +2014,7 @@ load 1m # Skip (0; +Inf] bucket (100). eval instant at 1m cbh_two_buckets_split_at_zero / -10.0 - cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}} + cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:100 custom_values:[0] buckets:[0 100]}} # Skip [-Inf, 0] bucket (1). eval instant at 1m cbh_two_buckets_split_at_zero >/ 0.0 - cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}} + cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:100 custom_values:[0] buckets:[0 100]}} # Skip both buckets (1 and 100). eval instant at 1m cbh_two_buckets_split_at_zero >/ 10.0 @@ -2042,11 +2042,11 @@ load 1m # Skip (5; +Inf] bucket (100). eval instant at 1m cbh_two_buckets_split_at_positive / -10.0 - cbh_two_buckets_split_at_positive {{schema:-53 sum:28.0 count:100 custom_values:[5] buckets:[0 100]}} + cbh_two_buckets_split_at_positive {{schema:-53 sum:500.0 count:100 custom_values:[5] buckets:[0 100]}} # Keep both buckets (1 and 100). eval instant at 1m cbh_two_buckets_split_at_positive >/ 0.0 - cbh_two_buckets_split_at_positive {{schema:-53 sum:33.0 count:101 custom_values:[5] buckets:[1 100]}} + cbh_two_buckets_split_at_positive {{schema:-53 sum:500.0 count:101 custom_values:[5] buckets:[1 100]}} # Keep (5, 100] bucket (100) and 3/5 of [-Inf, 5] bucket (0.6 * 1). eval instant at 1m cbh_two_buckets_split_at_positive >/ 2.0 - cbh_two_buckets_split_at_positive {{schema:-53 sum:32.6 count:100.6 custom_values:[5] buckets:[0.6 100]}} + cbh_two_buckets_split_at_positive {{schema:-53 sum:500.6 count:100.6 custom_values:[5] buckets:[0.6 100]}} # Skip both buckets (1 and 100). eval instant at 1m cbh_two_buckets_split_at_positive >/ 10.0 @@ -2078,15 +2078,15 @@ load 1m # Skip (-5; +Inf] bucket (100). eval instant at 1m cbh_two_buckets_split_at_negative / -10.0 - cbh_two_buckets_split_at_negative {{schema:-53 sum:38.0 count:100 custom_values:[-5] buckets:[0 100]}} + cbh_two_buckets_split_at_negative {{schema:-53 sum:-500 count:100 custom_values:[-5] buckets:[0 100]}} # Skip both buckets (1 and 100). eval instant at 1m cbh_two_buckets_split_at_negative >/ -2.0