Skip to content
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
* [BUGFIX] Query-frontend: Fix silent panic when executing a remote read API request if the request has no matchers. #13745
* [BUGFIX] Ruler: Fixed `-ruler.max-rule-groups-per-tenant-by-namespace` to only count rule groups in the specified namespace instead of all namespaces. #13743
* [BUGFIX] Query-frontend: Fix race condition that could sometimes cause unnecessary resharding of queriers if querier shuffle sharding and remote execution is enabled. #13794 #13838
* [ENHANCEMENT] MQE: Include metric name in `histogram_quantile` warning/info annotations when delayed name removal is enabled. #13905

### Mixin

Expand Down
125 changes: 82 additions & 43 deletions pkg/streamingpromql/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func TestOurTestCases(t *testing.T) {
testScript := string(b)

t.Run("Mimir's engine", func(t *testing.T) {
if strings.Contains(testFile, "name_label_dropping") {
if strings.Contains(testFile, "name_label_dropping") || strings.Contains(testFile, "delayed_name_removal_enabled") {
promqltest.RunTest(t, testScript, mimirEngineWithDelayedNameRemoval)
return
}
Expand All @@ -261,7 +261,7 @@ func TestOurTestCases(t *testing.T) {
t.Skip("disabled for Prometheus' engine due to bug in Prometheus' engine")
}

if strings.Contains(testFile, "name_label_dropping") {
if strings.Contains(testFile, "name_label_dropping") || strings.Contains(testFile, "delayed_name_removal_enabled") {
promqltest.RunTest(t, testScript, prometheusEngineWithDelayedNameRemoval)
return
}
Expand Down Expand Up @@ -2201,32 +2201,60 @@ func (t *timeoutTestingQueryTracker) Close() error {
}

type annotationTestCase struct {
data string
expr string
expectedWarningAnnotations []string
expectedInfoAnnotations []string
skipComparisonWithPrometheusReason string
instantEvaluationTimestamp *time.Time
data string
expr string
expectedWarningAnnotations []string
// an alternate string for when delayed name removal is enabled.
// if not set the test will fall back to expectedWarningAnnotations
expectedWarningAnnotationsDelayedNameRemovalEnabled []string
expectedInfoAnnotations []string
// an alternate string for when delayed name removal is enabled.
// if not set the test will fall back to expectedInfoAnnotations
expectedInfoAnnotationsDelayedNameRemovalEnabled []string
skipComparisonWithPrometheusReason string
instantEvaluationTimestamp *time.Time
}

func (a annotationTestCase) getExpectedInfoAnnotations(delayedNameRemovalEnabled bool) []string {
if delayedNameRemovalEnabled && a.expectedInfoAnnotationsDelayedNameRemovalEnabled != nil {
return a.expectedInfoAnnotationsDelayedNameRemovalEnabled
}
return a.expectedInfoAnnotations
}

func (a annotationTestCase) getExpectedWarningAnnotations(delayedNameRemovalEnabled bool) []string {
if delayedNameRemovalEnabled && a.expectedWarningAnnotationsDelayedNameRemovalEnabled != nil {
return a.expectedWarningAnnotationsDelayedNameRemovalEnabled
}
return a.expectedWarningAnnotations
}

func runAnnotationTests(t *testing.T, testCases map[string]annotationTestCase) {
startT := timestamp.Time(0).Add(time.Minute)
step := time.Minute
endT := startT.Add(2 * step)

opts := NewTestEngineOpts()
planner, err := NewQueryPlanner(opts, NewMaximumSupportedVersionQueryPlanVersionProvider())
require.NoError(t, err)
mimirEngine, err := NewEngine(opts, NewStaticQueryLimitsProvider(0), stats.NewQueryMetrics(nil), planner)
require.NoError(t, err)
prometheusEngine := promql.NewEngine(opts.CommonOpts)

const prometheusEngineName = "Prometheus' engine"
engines := map[string]promql.QueryEngine{
"Mimir's engine": mimirEngine,
const mimirEngineName = "Mimir's engine"

// create 2 sets of engines - one with EnableDelayedNameRemoval=true and the other with EnableDelayedNameRemoval=false
// there are some histogram annotation test cases which will emit a different warning/info annotation string depending on the delayed name removal setting
engines := make(map[bool]map[string]promql.QueryEngine, 2)
for _, delayedNameRemovalEnabled := range []bool{true, false} {
opts := NewTestEngineOpts()
opts.CommonOpts.EnableDelayedNameRemoval = delayedNameRemovalEnabled

planner, err := NewQueryPlanner(opts, NewMaximumSupportedVersionQueryPlanVersionProvider())
require.NoError(t, err)
mimirEngine, err := NewEngine(opts, NewStaticQueryLimitsProvider(0), stats.NewQueryMetrics(nil), planner)
require.NoError(t, err)
prometheusEngine := promql.NewEngine(opts.CommonOpts)

// Compare against Prometheus' engine to verify our test cases are valid.
prometheusEngineName: prometheusEngine,
engines[delayedNameRemovalEnabled] = map[string]promql.QueryEngine{
mimirEngineName: mimirEngine,
prometheusEngineName: prometheusEngine,
}
}

for name, testCase := range testCases {
Expand All @@ -2251,33 +2279,35 @@ func runAnnotationTests(t *testing.T, testCases map[string]annotationTestCase) {

for queryType, generator := range queryTypes {
t.Run(queryType, func(t *testing.T) {
results := make([]*promql.Result, 0, 2)

for engineName, engine := range engines {
if engineName == prometheusEngineName && testCase.skipComparisonWithPrometheusReason != "" {
t.Logf("Skipping comparison with Prometheus' engine: %v", testCase.skipComparisonWithPrometheusReason)
continue
for _, delayedNameRemovalEnabled := range []bool{true, false} {
results := make([]*promql.Result, 0, 2)

for engineName, engine := range engines[delayedNameRemovalEnabled] {
if engineName == prometheusEngineName && testCase.skipComparisonWithPrometheusReason != "" {
t.Logf("Skipping comparison with Prometheus' engine: %v", testCase.skipComparisonWithPrometheusReason)
continue
}
t.Run(engineName, func(t *testing.T) {
query, err := generator(engine)
require.NoError(t, err)
t.Cleanup(query.Close)

res := query.Exec(context.Background())
require.NoError(t, res.Err)
results = append(results, res)

warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0)
require.ElementsMatch(t, testCase.getExpectedWarningAnnotations(delayedNameRemovalEnabled), warnings)
require.ElementsMatch(t, testCase.getExpectedInfoAnnotations(delayedNameRemovalEnabled), infos)
})
}
t.Run(engineName, func(t *testing.T) {
query, err := generator(engine)
require.NoError(t, err)
t.Cleanup(query.Close)

res := query.Exec(context.Background())
require.NoError(t, res.Err)
results = append(results, res)

warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0)
require.ElementsMatch(t, testCase.expectedWarningAnnotations, warnings)
require.ElementsMatch(t, testCase.expectedInfoAnnotations, infos)
})
}

// If both results are available, compare them (sometimes we skip prometheus)
if len(results) == 2 {
// We do this extra comparison to ensure that we don't skip a series that may be outputted during a warning
// or vice-versa where no result may be expected etc.
testutils.RequireEqualResults(t, testCase.expr, results[0], results[1], false)
// If both results are available, compare them (sometimes we skip prometheus)
if len(results) == 2 {
// We do this extra comparison to ensure that we don't skip a series that may be outputted during a warning
// or vice-versa where no result may be expected etc.
testutils.RequireEqualResults(t, testCase.expr, results[0], results[1], false)
}
}
})
}
Expand Down Expand Up @@ -3141,6 +3171,7 @@ func TestHistogramAnnotations(t *testing.T) {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="c"})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "abc" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "abc" for metric name "series" (1:25)`},
},
"invalid quantile warning": {
data: mixedClassicHistograms,
Expand All @@ -3151,11 +3182,13 @@ func TestHistogramAnnotations(t *testing.T) {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="a"})`,
expectedWarningAnnotations: []string{`PromQL warning: vector contains a mix of classic and native histograms (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: vector contains a mix of classic and native histograms for metric name "series" (1:25)`},
},
"forced monotonicity info": {
data: mixedClassicHistograms,
expr: `histogram_quantile(0.5, series{host="d"})`,
expectedInfoAnnotations: []string{`PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) (1:25)`},
expectedInfoAnnotationsDelayedNameRemovalEnabled: []string{`PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name "series" (1:25)`},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realise this isn't something we can control, but I find the expression here hard to follow - the "for metric name ..." bit seems out of place after the parentheses, it fits better before the parentheses to me.

Perhaps something to change upstream?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which ones do you like;

  • The histogram_quantile input for metric "series" was corrected to enforce monotonicity (1:25) - see ....
  • The histogram_quantile input for metric "series" was fixed to enforce monotonicity (1:25) - see ....
  • Corrected non-monotonic histogram_quantile input for metric "series" (1:25) - see ....
  • Fixed non-monotonic histogram_quantile input for metric "series" (1:25) - see ....

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with tweaking the old structure:

input to histogram_quantile needed to be fixed for monotonicity for metric name "series" (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile)

},
"both mixed classic+native histogram and invalid quantile warnings": {
data: mixedClassicHistograms,
Expand All @@ -3164,6 +3197,10 @@ func TestHistogramAnnotations(t *testing.T) {
`PromQL warning: vector contains a mix of classic and native histograms (1:23)`,
`PromQL warning: quantile value should be between 0 and 1, got 9 (1:20)`,
},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{
`PromQL warning: vector contains a mix of classic and native histograms for metric name "series" (1:23)`,
`PromQL warning: quantile value should be between 0 and 1, got 9 (1:20)`,
},
},
"forced monotonicity info is not emitted when quantile is invalid": {
data: mixedClassicHistograms,
Expand All @@ -3176,6 +3213,7 @@ func TestHistogramAnnotations(t *testing.T) {
`,
expr: `histogram_quantile(0.5, series{})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" for metric name "series" (1:25)`},
},
"extra entry in series without le label": {
data: `
Expand All @@ -3184,6 +3222,7 @@ func TestHistogramAnnotations(t *testing.T) {
`,
expr: `histogram_quantile(0.5, series{})`,
expectedWarningAnnotations: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" (1:25)`},
expectedWarningAnnotationsDelayedNameRemovalEnabled: []string{`PromQL warning: bucket label "le" is missing or has a malformed value of "" for metric name "series" (1:25)`},
},
}

Expand Down
36 changes: 27 additions & 9 deletions pkg/streamingpromql/operators/functions/histogram_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@ import (

const (
// intentionallyEmptyMetricName exists for annotations compatibility with prometheus.
// Now prometheus has delayed __name__ removal. Mimir doesn't yet, and we always remove __name__.
// Because of complication in implementing this in prometheus (see https://github.com/prometheus/prometheus/pull/16794),
// the name is always dropped if delayed __name__ removal is disabled.
// The name is dropped even if the function is the first one invoked on the vector selector.
// Mimir doesn't have delayed __name__ removal, so to stay close to prometheus, we never display the label in some annotations and warnings.
// This is only used for backwards compatibility when delayed __name__ removal is not enabled.
intentionallyEmptyMetricName = ""
)

Expand Down Expand Up @@ -136,6 +132,7 @@ func NewHistogramQuantileFunction(
annotations: annotations,
innerSeriesMetricNames: innerSeriesMetricNames,
innerExpressionPosition: inner.ExpressionPosition(),
enableDelayedNameRemoval: enableDelayedNameRemoval,
},
inner: inner,
memoryConsumptionTracker: memoryConsumptionTracker,
Expand Down Expand Up @@ -298,6 +295,16 @@ func (h *HistogramFunction) NextSeries(ctx context.Context) (types.InstantVector
return h.computeOutputSeriesForGroup(thisGroup)
}

// getMetricNameForSeries returns the metric name from innerSeriesMetricNames for the given series index.
// If enableDelayedNameRemoval is not enabled, this func will return "" to maintain compatibility with Prometheus.
func (h *HistogramFunction) getMetricNameForSeries(seriesIndex int) string {
if h.enableDelayedNameRemoval {
return h.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex)
} else {
return intentionallyEmptyMetricName
}
}

// accumulateUntilGroupComplete gathers all the series associated with the given bucketGroup
// As each inner series is selected, it is added into its respective groups.
// This means a group other than the one we are focused on may get completed first, but we
Expand Down Expand Up @@ -346,7 +353,7 @@ func (h *HistogramFunction) saveFloatsToGroup(fPoints []promql.FPoint, le string
if err != nil {
// The le label was invalid. Record it:
h.annotations.Add(annotations.NewBadBucketLabelWarning(
intentionallyEmptyMetricName,
h.getMetricNameForSeries(g.lastInputSeriesIdx),
le,
h.inner.ExpressionPosition(),
))
Expand Down Expand Up @@ -427,7 +434,7 @@ func (h *HistogramFunction) computeOutputSeriesForGroup(g *bucketGroup) (types.I
// At this data point, we have classic histogram buckets and a native histogram with the same name and labels.
// No value is returned, so emit an annotation and continue.
h.annotations.Add(annotations.NewMixedClassicNativeHistogramsWarning(
intentionallyEmptyMetricName, h.inner.ExpressionPosition(),
h.getMetricNameForSeries(g.lastInputSeriesIdx), h.inner.ExpressionPosition(),
))
continue
}
Expand Down Expand Up @@ -546,6 +553,7 @@ type histogramQuantile struct {
annotations *annotations.Annotations
innerSeriesMetricNames *operators.MetricNames
innerExpressionPosition posrange.PositionRange
enableDelayedNameRemoval bool
}

func (q *histogramQuantile) LoadArguments(ctx context.Context) error {
Expand All @@ -568,13 +576,23 @@ func (q *histogramQuantile) LoadArguments(ctx context.Context) error {
return nil
}

// getMetricNameForSeries returns the metric name from innerSeriesMetricNames for the given series index.
// If enableDelayedNameRemoval is not enabled, this func will return "" to maintain compatibility with Prometheus.
func (q *histogramQuantile) getMetricNameForSeries(seriesIndex int) string {
if q.enableDelayedNameRemoval {
return q.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex)
} else {
return intentionallyEmptyMetricName
}
}

func (q *histogramQuantile) ComputeClassicHistogramResult(pointIndex int, seriesIndex int, buckets promql.Buckets) float64 {
ph := q.phValues.Samples[pointIndex].F
res, forcedMonotonicity, _ := promql.BucketQuantile(ph, buckets)

if forcedMonotonicity {
q.annotations.Add(annotations.NewHistogramQuantileForcedMonotonicityInfo(
intentionallyEmptyMetricName,
q.getMetricNameForSeries(seriesIndex),
q.innerExpressionPosition,
))
}
Expand All @@ -584,7 +602,7 @@ func (q *histogramQuantile) ComputeClassicHistogramResult(pointIndex int, series

func (q *histogramQuantile) ComputeNativeHistogramResult(pointIndex int, seriesIndex int, h *histogram.FloatHistogram) (float64, annotations.Annotations) {
ph := q.phValues.Samples[pointIndex].F
return promql.HistogramQuantile(ph, h, q.innerSeriesMetricNames.GetMetricNameForSeries(seriesIndex), q.innerExpressionPosition)
return promql.HistogramQuantile(ph, h, q.getMetricNameForSeries(seriesIndex), q.innerExpressionPosition)
}

func (q *histogramQuantile) Prepare(ctx context.Context, params *types.PrepareParams) error {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,9 @@ func runTestCasesWithDelayedNameRemovalDisabled(t *testing.T, globPattern string
if strings.Contains(testFile, "name_label_dropping") {
t.Skip("name_label_dropping tests require delayed name removal to be enabled, but optimization pass requires it to be disabled")
}
if strings.Contains(testFile, "delayed_name_removal_enabled") {
t.Skip("delayed_name_removal_enabled tests require delayed name removal to be enabled, but optimization pass requires it to be disabled")
}

f, err := testdataFS.Open(testFile)
require.NoError(t, err)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# SPDX-License-Identifier: AGPL-3.0-only
# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
# Provenance-includes-license: Apache-2.0
# Provenance-includes-copyright: The Prometheus Authors

# Test histogram_quantile annotations.
load 1m
nonmonotonic_bucket{le="0.1"} 0+2x10
nonmonotonic_bucket{le="1"} 0+1x10
nonmonotonic_bucket{le="10"} 0+5x10
nonmonotonic_bucket{le="100"} 0+4x10
nonmonotonic_bucket{le="1000"} 0+9x10
nonmonotonic_bucket{le="+Inf"} 0+8x10
myHistogram1{abe="0.1"} 0+2x10
myHistogram2{le="Hello World"} 0+2x10
mixedHistogram{le="0.1"} 0+2x10
mixedHistogram{le="1"} 0+3x10
mixedHistogram{} {{schema:0 count:10 sum:50 buckets:[1 2 3]}}

eval instant at 1m histogram_quantile(0.5, nonmonotonic_bucket)
expect info msg: PromQL info: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile)
{} 8.5

eval instant at 1m histogram_quantile(0.5, myHistogram1)
expect warn msg: PromQL warning: bucket label "le" is missing or has a malformed value of ""

eval instant at 1m histogram_quantile(0.5, myHistogram2)
expect warn msg: PromQL warning: bucket label "le" is missing or has a malformed value of "Hello World"

eval instant at 1m histogram_quantile(0.5, mixedHistogram)
expect warn msg: PromQL warning: vector contains a mix of classic and native histograms

clear

# Test native histogram quantile and fraction when the native histogram with exponential
# buckets has NaN observations.
load 1m
histogram_nan{case="100% NaNs"} {{schema:0 count:0 sum:0}} {{schema:0 count:3 sum:NaN}}
histogram_nan{case="20% NaNs"} {{schema:0 count:0 sum:0}} {{schema:0 count:15 sum:NaN buckets:[12]}}

eval instant at 1m histogram_quantile(1, histogram_nan)
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN
{case="20% NaNs"} NaN

eval instant at 1m histogram_quantile(0.81, histogram_nan)
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN
{case="20% NaNs"} NaN

eval instant at 1m histogram_quantile(0.8, histogram_nan{case="100% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN

eval instant at 1m histogram_quantile(0.8, histogram_nan{case="20% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is skewed higher
{case="20% NaNs"} 1

eval instant at 1m histogram_quantile(0.4, histogram_nan{case="100% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is NaN
{case="100% NaNs"} NaN

# histogram_quantile and histogram_fraction equivalence if quantile is not NaN
eval instant at 1m histogram_quantile(0.4, histogram_nan{case="20% NaNs"})
expect info msg: PromQL info: input to histogram_quantile has NaN observations, result is skewed higher
{case="20% NaNs"} 0.7071067811865475
Loading
Loading