Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
* [BUGFIX] Query-frontend: Fix silent panic when executing a remote read API request if the request has no matchers. #13745
* [BUGFIX] Ruler: Fixed `-ruler.max-rule-groups-per-tenant-by-namespace` to only count rule groups in the specified namespace instead of all namespaces. #13743
* [BUGFIX] Query-frontend: Fix race condition that could sometimes cause unnecessary resharding of queriers if querier shuffle sharding and remote execution is enabled. #13794 #13838
* [ENHANCEMENT] MQE: Include metric name in `histogram_quantile` warning/info annotations when delayed name removal is enabled. #13905

### Mixin

Expand Down
125 changes: 82 additions & 43 deletions pkg/streamingpromql/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func TestOurTestCases(t *testing.T) {
testScript := string(b)

t.Run("Mimir's engine", func(t *testing.T) {
if strings.Contains(testFile, "name_label_dropping") {
if strings.Contains(testFile, "name_label_dropping") || strings.Contains(testFile, "delayed_name_removal_enabled") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we bring this condition out of the two t.Run calls (here and on line 264 below), so that we ensure that we consistently use / don't use engines with delayed name removal?

promqltest.RunTest(t, testScript, mimirEngineWithDelayedNameRemoval)
return
}
Expand All @@ -261,7 +261,7 @@ func TestOurTestCases(t *testing.T) {
t.Skip("disabled for Prometheus' engine due to bug in Prometheus' engine")
}

if strings.Contains(testFile, "name_label_dropping") {
if strings.Contains(testFile, "name_label_dropping") || strings.Contains(testFile, "delayed_name_removal_enabled") {
promqltest.RunTest(t, testScript, prometheusEngineWithDelayedNameRemoval)
return
}
Expand Down Expand Up @@ -2201,32 +2201,60 @@ func (t *timeoutTestingQueryTracker) Close() error {
}

type annotationTestCase struct {
data string
expr string
expectedWarningAnnotations []string
expectedInfoAnnotations []string
skipComparisonWithPrometheusReason string
instantEvaluationTimestamp *time.Time
data string
expr string
expectedWarningAnnotations []string
// an alternate string for when delayed name removal is enabled.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] This allows more than just one string.

Suggested change
// an alternate string for when delayed name removal is enabled.
// an alternate set of annotations for when delayed name removal is enabled.

// if not set the test will fall back to expectedWarningAnnotations
expectedWarningAnnotationsDelayedNameRemovalEnabled []string
expectedInfoAnnotations []string
// an alternate string for when delayed name removal is enabled.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] This allows more than just one string.

Suggested change
// an alternate string for when delayed name removal is enabled.
// an alternate set of annotations for when delayed name removal is enabled.

// if not set the test will fall back to expectedInfoAnnotations
expectedInfoAnnotationsDelayedNameRemovalEnabled []string
skipComparisonWithPrometheusReason string
instantEvaluationTimestamp *time.Time
}

func (a annotationTestCase) getExpectedInfoAnnotations(delayedNameRemovalEnabled bool) []string {
if delayedNameRemovalEnabled && a.expectedInfoAnnotationsDelayedNameRemovalEnabled != nil {
return a.expectedInfoAnnotationsDelayedNameRemovalEnabled
}
return a.expectedInfoAnnotations
}

func (a annotationTestCase) getExpectedWarningAnnotations(delayedNameRemovalEnabled bool) []string {
if delayedNameRemovalEnabled && a.expectedWarningAnnotationsDelayedNameRemovalEnabled != nil {
return a.expectedWarningAnnotationsDelayedNameRemovalEnabled
}
return a.expectedWarningAnnotations
}

func runAnnotationTests(t *testing.T, testCases map[string]annotationTestCase) {
startT := timestamp.Time(0).Add(time.Minute)
step := time.Minute
endT := startT.Add(2 * step)

opts := NewTestEngineOpts()
planner, err := NewQueryPlanner(opts, NewMaximumSupportedVersionQueryPlanVersionProvider())
require.NoError(t, err)
mimirEngine, err := NewEngine(opts, NewStaticQueryLimitsProvider(0), stats.NewQueryMetrics(nil), planner)
require.NoError(t, err)
prometheusEngine := promql.NewEngine(opts.CommonOpts)

const prometheusEngineName = "Prometheus' engine"
engines := map[string]promql.QueryEngine{
"Mimir's engine": mimirEngine,
const mimirEngineName = "Mimir's engine"

// create 2 sets of engines - one with EnableDelayedNameRemoval=true and the other with EnableDelayedNameRemoval=false
// there are some histogram annotation test cases which will emit a different warning/info annotation string depending on the delayed name removal setting
engines := make(map[bool]map[string]promql.QueryEngine, 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be even simpler (and help reduce some nesting below) to define engines something like this:

Suggested change
engines := make(map[bool]map[string]promql.QueryEngine, 2)
engines := []struct{
engine promql.QueryEngine
name string // eg. "Mimir's engine without delayed name removal"
delayedNameRemovalEnabled bool
}{}

for _, delayedNameRemovalEnabled := range []bool{true, false} {
opts := NewTestEngineOpts()
opts.CommonOpts.EnableDelayedNameRemoval = delayedNameRemovalEnabled

planner, err := NewQueryPlanner(opts, NewMaximumSupportedVersionQueryPlanVersionProvider())
require.NoError(t, err)
mimirEngine, err := NewEngine(opts, NewStaticQueryLimitsProvider(0), stats.NewQueryMetrics(nil), planner)
require.NoError(t, err)
prometheusEngine := promql.NewEngine(opts.CommonOpts)

// Compare against Prometheus' engine to verify our test cases are valid.
prometheusEngineName: prometheusEngine,
engines[delayedNameRemovalEnabled] = map[string]promql.QueryEngine{
mimirEngineName: mimirEngine,
prometheusEngineName: prometheusEngine,
}
}

for name, testCase := range testCases {
Expand All @@ -2251,33 +2279,35 @@ func runAnnotationTests(t *testing.T, testCases map[string]annotationTestCase) {

for queryType, generator := range queryTypes {
t.Run(queryType, func(t *testing.T) {
results := make([]*promql.Result, 0, 2)

for engineName, engine := range engines {
if engineName == prometheusEngineName && testCase.skipComparisonWithPrometheusReason != "" {
t.Logf("Skipping comparison with Prometheus' engine: %v", testCase.skipComparisonWithPrometheusReason)
continue
for _, delayedNameRemovalEnabled := range []bool{true, false} {
results := make([]*promql.Result, 0, 2)
Comment on lines +2282 to +2283
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should call t.Run here with a name that makes it clear if delayed name removal was enabled or disabled. (Otherwise we'll get two nested tests for each engine, both with the same name, eg. "Mimir's engine".)


for engineName, engine := range engines[delayedNameRemovalEnabled] {
Comment on lines +2282 to +2285
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for _, delayedNameRemovalEnabled := range []bool{true, false} {
results := make([]*promql.Result, 0, 2)
for engineName, engine := range engines[delayedNameRemovalEnabled] {
for engines, delayedNameRemovalEnabled := range engines {
results := make([]*promql.Result, 0, 2)
for engineName, engine := range engines {

if engineName == prometheusEngineName && testCase.skipComparisonWithPrometheusReason != "" {
t.Logf("Skipping comparison with Prometheus' engine: %v", testCase.skipComparisonWithPrometheusReason)
continue
}
t.Run(engineName, func(t *testing.T) {
query, err := generator(engine)
require.NoError(t, err)
t.Cleanup(query.Close)

res := query.Exec(context.Background())
require.NoError(t, res.Err)
results = append(results, res)

warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0)
require.ElementsMatch(t, testCase.getExpectedWarningAnnotations(delayedNameRemovalEnabled), warnings)
require.ElementsMatch(t, testCase.getExpectedInfoAnnotations(delayedNameRemovalEnabled), infos)
})
}
t.Run(engineName, func(t *testing.T) {
query, err := generator(engine)
require.NoError(t, err)
t.Cleanup(query.Close)

res := query.Exec(context.Background())
require.NoError(t, res.Err)
results = append(results, res)

warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0)
require.ElementsMatch(t, testCase.expectedWarningAnnotations, warnings)
require.ElementsMatch(t, testCase.expectedInfoAnnotations, infos)
})
}

// If both results are available, compare them (sometimes we skip prometheus)
if len(results) == 2 {
// We do this extra comparison to ensure that we don't skip a series that may be outputted during a warning
// or vice-versa where no result may be expected etc.
testutils.RequireEqualResults(t, testCase.expr, results[0], results[1], false)
// If both results are available, compare them (sometimes we skip prometheus)
if len(results) == 2 {
// We do this extra comparison to ensure that we don't skip a series that may be outputted during a warning
// or vice-versa where no result may be expected etc.
testutils.RequireEqualResults(t, testCase.expr, results[0], results[1], false)
}
}
})
}
Expand Down Expand Up @@ -3141,6 +3171,7 @@ func TestHistogramAnnotations(t *testing.T) {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="c"})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "abc" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "abc" for metric name "series" (1:25)`},
},
"invalid quantile warning": {
data: mixedClassicHistograms,
Expand All @@ -3151,11 +3182,13 @@ func TestHistogramAnnotations(t *testing.T) {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="a"})`,
expectedWarningAnnotations: []string{`PromQL warning: vector contains a mix of classic and native histograms (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: vector contains a mix of classic and native histograms for metric name "series" (1:25)`},
},
"forced monotonicity info": {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="d"})`,
expectedInfoAnnotations: []string{`PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) (1:25)`},
expectedInfoAnnotationsDelayedNameRemovalEnabled: []string{`PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name "series" (1:25)`},
},
"both mixed classic+native histogram and invalid quantile warnings": {
data: mixedClassicHistograms,
Expand All @@ -3164,6 +3197,10 @@ func TestHistogramAnnotations(t *testing.T) {
`PromQL warning: vector contains a mix of classic and native histograms (1:23)`,
`PromQL warning: quantile value should be between 0 and 1, got 9 (1:20)`,
},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{
`PromQL warning: vector contains a mix of classic and native histograms for metric name "series" (1:23)`,
`PromQL warning: quantile value should be between 0 and 1, got 9 (1:20)`,
},
},
"forced monotonicity info is not emitted when quantile is invalid": {
data: mixedClassicHistograms,
Expand All @@ -3176,6 +3213,7 @@ func TestHistogramAnnotations(t *testing.T) {
`,
expr: `histogram_quantile(0.5, series{})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" for metric name "series" (1:25)`},
},
"extra entry in series without le label": {
data: `
Expand All @@ -3184,6 +3222,7 @@ func TestHistogramAnnotations(t *testing.T) {
`,
expr: `histogram_quantile(0.5, series{})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" for metric name "series" (1:25)`},
},
}

Expand Down
36 changes: 27 additions & 9 deletions pkg/streamingpromql/operators/functions/histogram_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@ import (

const (
// intentionallyEmptyMetricName exists for annotations compatibility with prometheus.
// Now prometheus has delayed __name__ removal. Mimir doesn't yet, and we always remove __name__.
// Because of complication in implementing this in prometheus (see https://github.com/prometheus/prometheus/pull/16794),
// the name is always dropped if delayed __name__ removal is disabled.
// The name is dropped even if the function is the first one invoked on the vector selector.
// Mimir doesn't have delayed __name__ removal, so to stay close to prometheus, we never display the label in some annotations and warnings.
// This is only used for backwards compatibility when delayed __name__ removal is not enabled.
intentionallyEmptyMetricName = ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given this is only referenced in one place (getMetricNameForSeries) now, should we move this constant and the comment into there?

We could probably also drop the constant and just move the comment to explain why we return an empty value.

)

Expand Down Expand Up @@ -136,6 +132,7 @@ func NewHistogramQuantileFunction(
annotations: annotations,
innerSeriesMetricNames: innerSeriesMetricNames,
innerExpressionPosition: inner.ExpressionPosition(),
enableDelayedNameRemoval: enableDelayedNameRemoval,
},
inner: inner,
memoryConsumptionTracker: memoryConsumptionTracker,
Expand Down Expand Up @@ -298,6 +295,16 @@ func (h *HistogramFunction) NextSeries(ctx context.Context) (types.InstantVector
return h.computeOutputSeriesForGroup(thisGroup)
}

// getMetricNameForSeries returns the metric name from innerSeriesMetricNames for the given series index.
// If enableDelayedNameRemoval is not enabled, this func will return "" to maintain compatibility with Prometheus.
func (h *HistogramFunction) getMetricNameForSeries(seriesIndex int) string {
if h.enableDelayedNameRemoval {
return h.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex)
} else {
return intentionallyEmptyMetricName
}
}

// accumulateUntilGroupComplete gathers all the series associated with the given bucketGroup
// As each inner series is selected, it is added into its respective groups.
// This means a group other than the one we are focused on may get completed first, but we
Expand Down Expand Up @@ -346,7 +353,7 @@ func (h *HistogramFunction) saveFloatsToGroup(fPoints []promql.FPoint, le string
if err != nil {
// The le label was invalid. Record it:
h.annotations.Add(annotations.NewBadBucketLabelWarning(
intentionallyEmptyMetricName,
h.getMetricNameForSeries(g.lastInputSeriesIdx),
le,
h.inner.ExpressionPosition(),
))
Expand Down Expand Up @@ -427,7 +434,7 @@ func (h *HistogramFunction) computeOutputSeriesForGroup(g *bucketGroup) (types.I
// At this data point, we have classic histogram buckets and a native histogram with the same name and labels.
// No value is returned, so emit an annotation and continue.
h.annotations.Add(annotations.NewMixedClassicNativeHistogramsWarning(
intentionallyEmptyMetricName, h.inner.ExpressionPosition(),
h.getMetricNameForSeries(g.lastInputSeriesIdx), h.inner.ExpressionPosition(),
))
continue
}
Expand Down Expand Up @@ -546,6 +553,7 @@ type histogramQuantile struct {
annotations *annotations.Annotations
innerSeriesMetricNames *operators.MetricNames
innerExpressionPosition posrange.PositionRange
enableDelayedNameRemoval bool
}

func (q *histogramQuantile) LoadArguments(ctx context.Context) error {
Expand All @@ -568,13 +576,23 @@ func (q *histogramQuantile) LoadArguments(ctx context.Context) error {
return nil
}

// getMetricNameForSeries returns the metric name from innerSeriesMetricNames for the given series index.
// If enableDelayedNameRemoval is not enabled, this func will return "" to maintain compatibility with Prometheus.
func (q *histogramQuantile) getMetricNameForSeries(seriesIndex int) string {
if q.enableDelayedNameRemoval {
return q.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex)
} else {
return intentionallyEmptyMetricName
}
}

func (q *histogramQuantile) ComputeClassicHistogramResult(pointIndex int, seriesIndex int, buckets promql.Buckets) float64 {
ph := q.phValues.Samples[pointIndex].F
res, forcedMonotonicity, _ := promql.BucketQuantile(ph, buckets)

if forcedMonotonicity {
q.annotations.Add(annotations.NewHistogramQuantileForcedMonotonicityInfo(
intentionallyEmptyMetricName,
q.getMetricNameForSeries(seriesIndex),
q.innerExpressionPosition,
))
}
Expand All @@ -584,7 +602,7 @@ func (q *histogramQuantile) ComputeClassicHistogramResult(pointIndex int, series

func (q *histogramQuantile) ComputeNativeHistogramResult(pointIndex int, seriesIndex int, h *histogram.FloatHistogram) (float64, annotations.Annotations) {
ph := q.phValues.Samples[pointIndex].F
return promql.HistogramQuantile(ph, h, q.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex), q.innerExpressionPosition)
return promql.HistogramQuantile(ph, h, q.getMetricNameForSeries(seriesIndex), q.innerExpressionPosition)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

histogram_fraction ignores enableDelayedNameRemoval flag unlike histogram_quantile

The histogramFraction struct was not updated to include an enableDelayedNameRemoval field and corresponding getMetricNameForSeries method like histogramQuantile was. In ComputeNativeHistogramResult, histogram_fraction directly calls f.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex) unconditionally, while histogram_quantile now uses q.getMetricNameForSeries(seriesIndex) which respects the delayed name removal setting. This creates inconsistent behavior where histogram_fraction always includes metric names in annotations regardless of the enableDelayedNameRemoval flag, contradicting the PR's stated goal that metric names are "only included...if EnableDelayedNameRemoval is enabled."

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR relates only to histogram_quantile

}

func (q *histogramQuantile) Prepare(ctx context.Context, params *types.PrepareParams) error {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,9 @@ func runTestCasesWithDelayedNameRemovalDisabled(t *testing.T, globPattern string
if strings.Contains(testFile, "name_label_dropping") {
t.Skip("name_label_dropping tests require delayed name removal to be enabled, but optimization pass requires it to be disabled")
}
if strings.Contains(testFile, "delayed_name_removal_enabled") {
t.Skip("delayed_name_removal_enabled tests require delayed name removal to be enabled, but optimization pass requires it to be disabled")
}

f, err := testdataFS.Open(testFile)
require.NoError(t, err)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# SPDX-License-Identifier: AGPL-3.0-only
# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
# Provenance-includes-license: Apache-2.0
# Provenance-includes-copyright: The Prometheus Authors

# Test histogram_quantile annotations.
load 1m
nonmonotonic_bucket{le="0.1"} 0+2x10
nonmonotonic_bucket{le="1"} 0+1x10
nonmonotonic_bucket{le="10"} 0+5x10
nonmonotonic_bucket{le="100"} 0+4x10
nonmonotonic_bucket{le="1000"} 0+9x10
nonmonotonic_bucket{le="+Inf"} 0+8x10
myHistogram1{abe="0.1"} 0+2x10
myHistogram2{le="Hello World"} 0+2x10
mixedHistogram{le="0.1"} 0+2x10
mixedHistogram{le="1"} 0+3x10
mixedHistogram{} {{schema:0 count:10 sum:50 buckets:[1 2 3]}}

eval instant at 1m histogram_quantile(0.5, nonmonotonic_bucket)
expect info msg: PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile)
{} 8.5

eval instant at 1m histogram_quantile(0.5, myHistogram1)
expect warn msg: PromQL warning: bucket label "le" is missing or has a malformed value of ""

eval instant at 1m histogram_quantile(0.5, myHistogram2)
expect warn msg: PromQL warning: bucket label "le" is missing or has a malformed value of "Hello World"

eval instant at 1m histogram_quantile(0.5, mixedHistogram)
expect warn msg: PromQL warning: vector contains a mix of classic and native histograms

clear

# Test native histogram quantile and fraction when the native histogram with exponential
# buckets has NaN observations.
load 1m
histogram_nan{case="100% NaNs"} {{schema:0 count:0 sum:0}} {{schema:0 count:3 sum:NaN}}
histogram_nan{case="20% NaNs"} {{schema:0 count:0 sum:0}} {{schema:0 count:15 sum:NaN buckets:[12]}}

eval instant at 1m histogram_quantile(1, histogram_nan)
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN
{case="20% NaNs"} NaN

eval instant at 1m histogram_quantile(0.81, histogram_nan)
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN
{case="20% NaNs"} NaN

eval instant at 1m histogram_quantile(0.8, histogram_nan{case="100% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN

eval instant at 1m histogram_quantile(0.8, histogram_nan{case="20% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is skewed higher
{case="20% NaNs"} 1

eval instant at 1m histogram_quantile(0.4, histogram_nan{case="100% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN

# histogram_quantile and histogram_fraction equivalence if quantile is not NaN
eval instant at 1m histogram_quantile(0.4, histogram_nan{case="20% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is skewed higher
{case="20% NaNs"} 0.7071067811865475
Loading
Loading