From 5f72154afd460852e917aee109bc396a451220aa Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Fri, 22 May 2026 17:26:44 +0000 Subject: [PATCH] perf(ingester): lazy regex evaluation on head postings cache miss When the expanded postings cache misses on the head block, regex matchers on high-cardinality labels (e.g. pod with 400K+ values) dominate query cost. This PR defers expensive regex matchers to a lazy per-series evaluation when a selective equality matcher already narrows the result set significantly. On cache miss, splitMatchersForHeadWithConfig splits matchers into: - Selective matchers (equality, low-card regex) for postings lookup - Lazy matchers (high-card regex) applied per-series via LabelValueFor A cost-ratio gate decides when deferral is worthwhile: - Simple regex (single contains, prefix): cardinality > selectivePostings * 6 - Complex regex (multi-substring, capture groups): cardinality > selectivePostings * 2 Label cardinality lookups are cached in an expirable LRU (60s TTL) to avoid repeated LabelValues calls under load. Benchmark (realistic pod names, 413K cardinality, 9K selective postings): - Eager: 62ms, 29.8MB per query - Lazy: 14ms, 12.6MB per query (4.5x faster, 58% less memory) New flags (disabled by default with max-cardinality=0): - blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality - blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio - blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio --- CHANGELOG.md | 1 + docs/blocks-storage/querier.md | 19 + docs/blocks-storage/store-gateway.md | 19 + docs/configuration/config-file-reference.md | 19 + integration/query_fuzz_test.go | 274 ++++++++++ .../ingester_lazy_posting_bench_test.go | 288 ++++++++++ pkg/storage/tsdb/expanded_postings_cache.go | 107 +++- pkg/storage/tsdb/lazy_matchers.go | 333 ++++++++++++ pkg/storage/tsdb/lazy_matchers_test.go | 499 ++++++++++++++++++ schemas/cortex-config-schema.json | 18 + 10 files changed, 1574 insertions(+), 3 deletions(-) create mode 100644 pkg/ingester/ingester_lazy_posting_bench_test.go create mode 100644 pkg/storage/tsdb/lazy_matchers.go create mode 100644 pkg/storage/tsdb/lazy_matchers_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d5e891243..7d81fa4d17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ * [ENHANCEMENT] Distributor: Add HMAC-SHA256 stream authentication for `PushStream` via `-distributor.sign-write-requests-keys`. #7475 * [ENHANCEMENT] Instrument Ingester CPU profile with source for read APIs. #7494 * [ENHANCEMENT] Ingester: Convert expanded postings cache from FIFO to LRU eviction to retain frequently-queried entries under memory pressure. #7510 +* [ENHANCEMENT] Ingester: Add lazy regex evaluation on head postings cache miss. Defers expensive regex matchers on high-cardinality labels to per-series filtering when a selective equality matcher already narrows the result set. Configured via `-blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality` (disabled by default). #7553 * [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370 * [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380 * [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389 diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index 24bc6a4c3a..f9d7dfe0fc 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -1970,6 +1970,25 @@ blocks_storage: # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout [fetch_timeout: | default = 0s] + # Maximum label cardinality for deferring regex matchers on the head + # block. When a regex matcher targets a label with more unique values than + # this threshold, it is applied lazily during iteration instead of + # postings lookup. 0 disables. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality + [lazy_matcher_max_cardinality: | default = 0] + + # Cardinality:postings ratio above which a simple regex (prefix-only, + # single contains) is deferred to lazy iteration. Lower = more aggressive + # deferral. Calibrated empirically; defaults to 6. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio + [lazy_matcher_simple_cost_ratio: | default = 6] + + # Cardinality:postings ratio above which a complex regex (multi-substring, + # capture groups, character classes) is deferred. Lower = more aggressive + # deferral. Calibrated empirically; defaults to 2. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio + [lazy_matcher_complex_cost_ratio: | default = 2] + users_scanner: # Strategy to use to scan users. Supported values are: list, user_index. # CLI flag: -blocks-storage.users-scanner.strategy diff --git a/docs/blocks-storage/store-gateway.md b/docs/blocks-storage/store-gateway.md index 965a9089f2..9f3258e66a 100644 --- a/docs/blocks-storage/store-gateway.md +++ b/docs/blocks-storage/store-gateway.md @@ -2028,6 +2028,25 @@ blocks_storage: # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout [fetch_timeout: | default = 0s] + # Maximum label cardinality for deferring regex matchers on the head + # block. When a regex matcher targets a label with more unique values than + # this threshold, it is applied lazily during iteration instead of + # postings lookup. 0 disables. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality + [lazy_matcher_max_cardinality: | default = 0] + + # Cardinality:postings ratio above which a simple regex (prefix-only, + # single contains) is deferred to lazy iteration. Lower = more aggressive + # deferral. Calibrated empirically; defaults to 6. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio + [lazy_matcher_simple_cost_ratio: | default = 6] + + # Cardinality:postings ratio above which a complex regex (multi-substring, + # capture groups, character classes) is deferred. Lower = more aggressive + # deferral. Calibrated empirically; defaults to 2. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio + [lazy_matcher_complex_cost_ratio: | default = 2] + users_scanner: # Strategy to use to scan users. Supported values are: list, user_index. # CLI flag: -blocks-storage.users-scanner.strategy diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index b71490c2d9..dc1ab9071d 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2650,6 +2650,25 @@ tsdb: # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout [fetch_timeout: | default = 0s] + # Maximum label cardinality for deferring regex matchers on the head block. + # When a regex matcher targets a label with more unique values than this + # threshold, it is applied lazily during iteration instead of postings + # lookup. 0 disables. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality + [lazy_matcher_max_cardinality: | default = 0] + + # Cardinality:postings ratio above which a simple regex (prefix-only, single + # contains) is deferred to lazy iteration. Lower = more aggressive deferral. + # Calibrated empirically; defaults to 6. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio + [lazy_matcher_simple_cost_ratio: | default = 6] + + # Cardinality:postings ratio above which a complex regex (multi-substring, + # capture groups, character classes) is deferred. Lower = more aggressive + # deferral. Calibrated empirically; defaults to 2. + # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio + [lazy_matcher_complex_cost_ratio: | default = 2] + users_scanner: # Strategy to use to scan users. Supported values are: list, user_index. # CLI flag: -blocks-storage.users-scanner.strategy diff --git a/integration/query_fuzz_test.go b/integration/query_fuzz_test.go index f735bd91d5..a011ae8c8e 100644 --- a/integration/query_fuzz_test.go +++ b/integration/query_fuzz_test.go @@ -662,6 +662,280 @@ func TestExpandedPostingsCacheFuzz(t *testing.T) { } } +// TestLazyMatchersFuzz fuzzes PromQL queries against two cortex instances with +// identical data: +// - cortex-1: head expanded-postings cache enabled, lazy matcher DISABLED +// (the eager path - regex applied during postings lookup). +// - cortex-2: head expanded-postings cache enabled, lazy matcher ENABLED +// with aggressive thresholds (cardinality=1, both cost ratios=1) so the +// optimization fires on every regex matcher. +// +// The test verifies: +// 1. Query results match between the two instances (correctness). +// 2. The cortex_ingester_expanded_postings_lazy_matcher_queries_total counter +// is incremented on cortex-2 (the optimization actually triggers). +func TestLazyMatchersFuzz(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + // Start dependencies. + consul1 := e2edb.NewConsulWithName("consul1") + consul2 := e2edb.NewConsulWithName("consul2") + require.NoError(t, s.StartAndWaitReady(consul1, consul2)) + + baseFlags := mergeFlags( + AlertmanagerLocalFlags(), + map[string]string{ + "-store.engine": blocksStorageEngine, + "-blocks-storage.backend": "filesystem", + "-blocks-storage.tsdb.head-compaction-interval": "4m", + "-blocks-storage.tsdb.block-ranges-period": "2h", + "-blocks-storage.tsdb.ship-interval": "1h", + "-blocks-storage.bucket-store.sync-interval": "15m", + "-blocks-storage.tsdb.retention-period": "2h", + "-blocks-storage.bucket-store.index-cache.backend": tsdb.IndexCacheBackendInMemory, + "-blocks-storage.bucket-store.bucket-index.enabled": "true", + "-blocks-storage.expanded_postings_cache.head.enabled": "true", + "-blocks-storage.expanded_postings_cache.block.enabled": "true", + "-distributor.replication-factor": "1", + "-store-gateway.sharding-enabled": "false", + "-alertmanager.web.external-url": "http://localhost/alertmanager", + // The alertmanager initializes a memberlist gossip ring that auto- + // detects a private RFC1918 IP. On Docker networks where containers + // get non-private IPs (e.g. the 240.0.0.0/4 reserved range), this + // detection hard-fails. Setting an explicit advertise address skips + // the autodetection — the value is unused since we don't enable HA + // peers, but presence of the flag is enough. + "-alertmanager.cluster.advertise-address": "127.0.0.1:9094", + }, + ) + + // cortex-1: eager path. Lazy matcher disabled (default). + flags1 := mergeFlags(baseFlags, map[string]string{ + "-ring.store": "consul", + "-consul.hostname": consul1.NetworkHTTPEndpoint(), + "-ingester.matchers-cache-max-items": "10000", + }) + + // cortex-2: lazy path. Aggressive thresholds force the optimization to + // fire on essentially every regex matcher, so we exercise the lazy code + // path repeatedly for correctness verification. + flags2 := mergeFlags(baseFlags, map[string]string{ + "-ring.store": "consul", + "-consul.hostname": consul2.NetworkHTTPEndpoint(), + "-ingester.matchers-cache-max-items": "10000", + "-blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality": "1", + "-blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio": "1", + "-blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio": "1", + }) + + require.NoError(t, writeFileToSharedDir(s, "alertmanager_configs", []byte{})) + + path1 := path.Join(s.SharedDir(), "cortex-1") + path2 := path.Join(s.SharedDir(), "cortex-2") + flags1 = mergeFlags(flags1, map[string]string{"-blocks-storage.filesystem.dir": path1}) + flags2 = mergeFlags(flags2, map[string]string{"-blocks-storage.filesystem.dir": path2}) + + // Both instances use the local build. + cortex1 := e2ecortex.NewSingleBinary("cortex-1", flags1, "") + cortex2 := e2ecortex.NewSingleBinary("cortex-2", flags2, "") + require.NoError(t, s.StartAndWaitReady(cortex1, cortex2)) + + require.NoError(t, cortex1.WaitSumMetrics(e2e.Equals(float64(512)), "cortex_ring_tokens_total")) + require.NoError(t, cortex2.WaitSumMetrics(e2e.Equals(float64(512)), "cortex_ring_tokens_total")) + + c1, err := e2ecortex.NewClient(cortex1.HTTPEndpoint(), cortex1.HTTPEndpoint(), "", "", "user-1") + require.NoError(t, err) + c2, err := e2ecortex.NewClient(cortex2.HTTPEndpoint(), cortex2.HTTPEndpoint(), "", "", "user-1") + require.NoError(t, err) + + now := time.Now() + start := now.Add(-24 * time.Hour) + scrapeInterval := 30 * time.Second + + // Build a fixture with multiple labels, including a high-cardinality + // "pod"-style label so regex matchers from promqlsmith actually exercise + // the deferral path. With lazy-matcher-max-cardinality=1, any label with + // >1 unique value is eligible. + numSeries := 10 + numberOfLabelsPerSeries := 5 + numSamples := 10 + ss := make([]prompb.TimeSeries, numSeries*numberOfLabelsPerSeries) + lbls := make([]labels.Labels, numSeries*numberOfLabelsPerSeries) + + for i := 0; i < numSeries; i++ { + for j := 0; j < numberOfLabelsPerSeries; j++ { + series := e2e.GenerateSeriesWithSamples( + fmt.Sprintf("test_series_%d", i), + start, + scrapeInterval, + i*numSamples, + numSamples, + prompb.Label{Name: "test_label", Value: fmt.Sprintf("test_label_value_%d", j)}, + prompb.Label{Name: "pod", Value: fmt.Sprintf("test_pod_%d_%d", i, j)}, + ) + ss[i*numberOfLabelsPerSeries+j] = series + + builder := labels.NewBuilder(labels.EmptyLabels()) + for _, lbl := range series.Labels { + builder.Set(lbl.Name, lbl.Value) + } + lbls[i*numberOfLabelsPerSeries+j] = builder.Labels() + } + } + + for _, client := range []*e2ecortex.Client{c1, c2} { + res, err := client.Push(ss) + require.NoError(t, err) + require.Equal(t, 200, res.StatusCode) + } + + rnd := rand.New(rand.NewSource(now.Unix())) + opts := []promqlsmith.Option{ + promqlsmith.WithEnabledAggrs(enabledAggrs), + } + ps := promqlsmith.New(rnd, lbls, opts...) + + // Regex patterns that exercise different cost classes in the lazy matcher gate. + // Each pattern matches a SUBSET of pods (not all), so both =~ and !~ queries + // return non-empty results, verifying correctness with actual data. + regexPatterns := []string{ + ".*_0_.*", // single contains (simple) — 5/50 pods + ".*_[0-4]_[0-2]", // character class (complex) — 15/50 pods + "test_pod_[5-9]_.*", // prefix + class (complex) — 25/50 pods + ".*pod_3.*", // single contains (simple) — 5/50 pods + "(test_pod_1|test_pod_2)_.*", // alternation (complex) — 10/50 pods + } + + testRun := 300 + queries := make([]string, 0, testRun*2) + matchers := make([]string, 0, testRun) + for i := 0; i < testRun; i++ { + expr := ps.WalkRangeQuery() + if !isValidQuery(expr, true) { + continue + } + queries = append(queries, expr.Pretty(0)) + + // Each matcher set includes a __name__= anchor + a regex on pod, + // guaranteeing the lazy matcher optimization fires on every cache miss. + regex := regexPatterns[i%len(regexPatterns)] + matchers = append(matchers, storepb.PromMatchersToString( + append( + ps.WalkSelectors(), + labels.MustNewMatcher(labels.MatchEqual, "__name__", fmt.Sprintf("test_series_%d", i%numSeries)), + labels.MustNewMatcher(labels.MatchRegexp, "pod", regex), + )...)) + + // Also generate a direct PromQL query with the regex so the instant/range + // query path exercises the lazy matcher too. Include iteration index in + // a != matcher to force unique cache keys (cache miss on every query). + queries = append(queries, fmt.Sprintf(`test_series_%d{pod=~"%s",test_label!="iter_%d"}`, i%numSeries, regex, i)) + // Also test negative regex (!~) to exercise that code path. + queries = append(queries, fmt.Sprintf(`test_series_%d{pod!~"%s",test_label!="iter_%d_neg"}`, i%numSeries, regex, i)) + } + + type testCase struct { + query string + qt string + res1, res2 model.Value + sres1, sres2 []model.LabelSet + err1, err2 error + } + + cases := make([]*testCase, 0, len(queries)*2+len(matchers)) + + // Data spans [start, start + (numSamples-1)*scrapeInterval]. Constrain + // fuzzed timestamps to this window so queries actually hit the head block. + dataEnd := start.Add(scrapeInterval * time.Duration(numSamples-1)) + dataWindowMs := dataEnd.Sub(start).Milliseconds() + + for _, query := range queries { + fuzzyTime := time.Duration(rand.Int63n(dataWindowMs)) + queryEnd := start.Add(fuzzyTime * time.Millisecond) + res1, err1 := c1.Query(query, queryEnd) + res2, err2 := c2.Query(query, queryEnd) + cases = append(cases, &testCase{ + query: query, qt: "instant", + res1: res1, res2: res2, err1: err1, err2: err2, + }) + res1, err1 = c1.QueryRange(query, start, queryEnd, scrapeInterval) + res2, err2 = c2.QueryRange(query, start, queryEnd, scrapeInterval) + cases = append(cases, &testCase{ + query: query, qt: "range query", + res1: res1, res2: res2, err1: err1, err2: err2, + }) + } + + for _, m := range matchers { + fuzzyTime := time.Duration(rand.Int63n(dataWindowMs)) + queryEnd := start.Add(fuzzyTime * time.Millisecond) + res1, err := c1.Series([]string{m}, start, queryEnd) + require.NoError(t, err) + res2, err := c2.Series([]string{m}, start, queryEnd) + require.NoError(t, err) + cases = append(cases, &testCase{ + query: m, qt: "get series", + sres1: res1, sres2: res2, + }) + } + + failures := 0 + for i, tc := range cases { + if tc.err1 != nil || tc.err2 != nil { + if !cmp.Equal(tc.err1, tc.err2) { + t.Logf("case %d error mismatch.\n%s: %s\nerr1: %v\nerr2: %v\n", i, tc.qt, tc.query, tc.err1, tc.err2) + failures++ + } + } else if shouldUseSampleNumComparer(tc.query) { + if !cmp.Equal(tc.res1, tc.res2, sampleNumComparer) { + t.Logf("case %d # of samples mismatch.\n%s: %s\nres1: %s\nres2: %s\n", i, tc.qt, tc.query, tc.res1.String(), tc.res2.String()) + failures++ + } + } else if !cmp.Equal(tc.res1, tc.res2, comparer) { + t.Logf("case %d results mismatch.\n%s: %s\nres1: %s\nres2: %s\n", i, tc.qt, tc.query, tc.res1.String(), tc.res2.String()) + failures++ + } else if !cmp.Equal(tc.sres1, tc.sres2, labelSetsComparer) { + t.Logf("case %d series results mismatch.\n%s: %s\nsres1: %s\nsres2: %s\n", i, tc.qt, tc.query, tc.sres1, tc.sres2) + failures++ + } + } + if failures > 0 { + require.Failf(t, "finished lazy matcher fuzzing tests", "%d test cases failed", failures) + } + + // Verify the lazy-matcher optimization was actually triggered on cortex-2. + // If the gate is misconfigured or the test fixture doesn't exercise the + // path, this guards against silent regressions where the optimization + // becomes a no-op. + + // Diagnostic: print related counters before the assertion so failures + // can be debugged from the test output. + for _, m := range []string{ + "cortex_ingester_queries", + "cortex_ingester_queried_series", + "cortex_ingester_queried_chunks", + "cortex_ingester_expanded_postings_cache_requests_total", + "cortex_ingester_expanded_postings_cache_hits_total", + "cortex_ingester_expanded_postings_non_cacheable_queries_total", + "cortex_ingester_expanded_postings_lazy_matcher_queries_total", + } { + v, _ := cortex2.SumMetrics([]string{m}) + t.Logf("cortex-2 %s = %v", m, v) + } + + require.NoError(t, cortex2.WaitSumMetrics(e2e.Greater(0), + "cortex_ingester_expanded_postings_lazy_matcher_queries_total")) + + // Sanity check: cortex-1 (eager) should NEVER increment this counter. + c1Lazy, err := cortex1.SumMetrics([]string{"cortex_ingester_expanded_postings_lazy_matcher_queries_total"}) + if err == nil && len(c1Lazy) > 0 { + require.Equal(t, float64(0), c1Lazy[0], + "cortex-1 has lazy matcher disabled but the metric is non-zero") + } +} + func TestVerticalShardingFuzz(t *testing.T) { s, err := e2e.NewScenario(networkName) require.NoError(t, err) diff --git a/pkg/ingester/ingester_lazy_posting_bench_test.go b/pkg/ingester/ingester_lazy_posting_bench_test.go new file mode 100644 index 0000000000..fee2e0f770 --- /dev/null +++ b/pkg/ingester/ingester_lazy_posting_bench_test.go @@ -0,0 +1,288 @@ +package ingester + +import ( + "context" + "fmt" + "strconv" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/store/storepb" + "github.com/weaveworks/common/user" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/util/services" + "github.com/cortexproject/cortex/pkg/util/test" +) // BenchmarkIngester_LazyPosting exercises the lazy-matcher-max-cardinality +// optimization on the head block cache miss path. +// +// The scenarios are based on the benchmark configurations below: +// - "small_select_huge_regex_label": __name__ selects 1% of series, regex +// targets a 100K-cardinality label. This is the primary case the lazy +// matcher optimization is designed for; it should be a clear win. +// - "balanced_select_huge_regex_label": __name__ selects 50% of series. +// Lazy LabelValueFor calls are now done on a large set; could go either way. +// - "small_select_low_card_regex": regex label has only 100 distinct values. +// Below the cardinality threshold; optimization should not engage and +// overhead must be near zero. +// - "small_select_complex_regex": __name__ selects 1%, but regex is complex +// (.*lit.*lit.*) — exercises both the cardinality saving and the cost of +// running complex regex per-series during lazy filter. +// +// Each scenario runs: +// - cache_disabled : baseline; no cache, eager regex +// - cache_enabled_eager : cache enabled, lazy disabled (current default) +// - cache_enabled_lazy : cache enabled + lazy-matcher threshold +// +// The benchmark forces a cache miss by clearing the seed before each iteration. +func BenchmarkIngester_LazyPosting(b *testing.B) { + scenarios := []struct { + name string + // Series cardinality knobs + nameValues int // number of distinct __name__ values + seriesPerName int // series per metric name (drives __name__= selectivity) + podCardinality int // distinct pod values across all series + podSharedAcrossName bool + // Query + matchers []*labels.Matcher + expectedHits int // sanity check; >0 means some series matched + expectMatches bool + }{ + { + name: "small_select_huge_regex_label", + nameValues: 50, // 50 distinct metric names + seriesPerName: 2000, // each metric has 2000 series → __name__=cpu selects 2K of 100K + podCardinality: 100000, + podSharedAcrossName: false, + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"), + }, + expectMatches: true, + }, + { + name: "balanced_select_huge_regex_label", + nameValues: 2, // only 2 metric names + seriesPerName: 50000, // each has 50K series → __name__= selects 50K of 100K + podCardinality: 100000, + podSharedAcrossName: false, + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"), + }, + expectMatches: true, + }, + { + name: "small_select_low_card_regex", + nameValues: 50, + seriesPerName: 2000, + podCardinality: 100, // below default threshold + podSharedAcrossName: true, + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"), + }, + expectMatches: true, + }, + { + name: "small_select_complex_regex", + nameValues: 50, + seriesPerName: 2000, + podCardinality: 100000, + podSharedAcrossName: false, + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + // .*foo.*bar.* — multi-substring contains; complex per-call + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod.*1.*"), + }, + expectMatches: true, + }, + { + name: "small_select_capture_regex", + nameValues: 50, + seriesPerName: 2000, + podCardinality: 100000, + podSharedAcrossName: false, + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + // (foo|bar).* — capture group alternation, common in envoy_authority + labels.MustNewMatcher(labels.MatchRegexp, "pod", "(pod-1|pod-2).*"), + }, + expectMatches: true, + }, + } + + configs := []struct { + name string + cacheEnabled bool + lazyCardinality int + }{ + {name: "no_cache", cacheEnabled: false, lazyCardinality: 0}, + {name: "cache_eager", cacheEnabled: true, lazyCardinality: 0}, + {name: "cache_lazy_10k", cacheEnabled: true, lazyCardinality: 10000}, + {name: "cache_lazy_1k", cacheEnabled: true, lazyCardinality: 1000}, + } + + for _, sc := range scenarios { + b.Run(sc.name, func(b *testing.B) { + for _, cfg := range configs { + b.Run(cfg.name, func(b *testing.B) { + runLazyPostingBenchmark(b, sc.nameValues, sc.seriesPerName, sc.podCardinality, + sc.podSharedAcrossName, cfg.cacheEnabled, cfg.lazyCardinality, sc.matchers, + sc.expectMatches) + }) + } + }) + } +} + +// BenchmarkIngester_LazyPosting_CacheHit measures cache-hit overhead. +// The lazy optimization must not slow down the hit path. +func BenchmarkIngester_LazyPosting_CacheHit(b *testing.B) { + matchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"), + } + + for _, lazy := range []int{0, 10000} { + name := "eager" + if lazy > 0 { + name = "lazy" + } + b.Run(name, func(b *testing.B) { + runCacheHitBenchmark(b, 50, 2000, 100000, matchers, lazy) + }) + } +} + +func runLazyPostingBenchmark( + b *testing.B, + nameValues, seriesPerName, podCardinality int, + podSharedAcrossName bool, + cacheEnabled bool, + lazyCardinality int, + matchers []*labels.Matcher, + expectMatches bool, +) { + b.Helper() + const userID = "test" + cfg := defaultIngesterTestConfig(b) + if cacheEnabled { + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled = true + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.MaxBytes = 100 * 1024 * 1024 + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Ttl = time.Hour + cfg.BlocksStorageConfig.TSDB.PostingsCache.LazyMatcherMaxCardinality = lazyCardinality + } + + i, err := prepareIngesterWithBlocksStorage(b, cfg, prometheus.NewRegistry()) + require.NoError(b, err) + require.NoError(b, services.StartAndAwaitRunning(context.Background(), i)) + defer func() { _ = services.StopAndAwaitTerminated(context.Background(), i) }() + + test.Poll(b, time.Second, ring.ACTIVE, func() any { return i.lifecycler.GetState() }) + + ctx := user.InjectOrgID(context.Background(), userID) + pushSeries(b, i, ctx, nameValues, seriesPerName, podCardinality, podSharedAcrossName) + + db, err := i.getTSDB(userID) + require.NoError(b, err) + require.NotNil(b, db) + + mockStream := &mockQueryStreamServer{ctx: ctx} + sm := (&storepb.ShardInfo{TotalShards: 0}).Matcher(nil) + + // Warm up once and validate + numSeries, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream) + require.NoError(b, err) + if expectMatches { + require.Greater(b, numSeries, 0, "scenario must produce matches") + } + mockStream.series = mockStream.series[:0] + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + // Force cache miss by mutating the seed for this metric name. + // The seed-by-hash map is keyed by (userID, metricName); we bump it + // to invalidate any cached promise for this query. + if cacheEnabled && db.postingCache != nil { + db.postingCache.ExpireSeries(labels.FromStrings(model.MetricNameLabel, "metric_0")) + } + _, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream) + require.NoError(b, err) + mockStream.series = mockStream.series[:0] + } +} + +func runCacheHitBenchmark(b *testing.B, nameValues, seriesPerName, podCardinality int, matchers []*labels.Matcher, lazyCardinality int) { + const userID = "test" + cfg := defaultIngesterTestConfig(b) + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled = true + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.MaxBytes = 100 * 1024 * 1024 + cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Ttl = time.Hour + cfg.BlocksStorageConfig.TSDB.PostingsCache.LazyMatcherMaxCardinality = lazyCardinality + + i, err := prepareIngesterWithBlocksStorage(b, cfg, prometheus.NewRegistry()) + require.NoError(b, err) + require.NoError(b, services.StartAndAwaitRunning(context.Background(), i)) + defer func() { _ = services.StopAndAwaitTerminated(context.Background(), i) }() + test.Poll(b, time.Second, ring.ACTIVE, func() any { return i.lifecycler.GetState() }) + + ctx := user.InjectOrgID(context.Background(), userID) + pushSeries(b, i, ctx, nameValues, seriesPerName, podCardinality, false) + + db, err := i.getTSDB(userID) + require.NoError(b, err) + + mockStream := &mockQueryStreamServer{ctx: ctx} + sm := (&storepb.ShardInfo{TotalShards: 0}).Matcher(nil) + + // Prime the cache + _, _, _, _, err = i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream) + require.NoError(b, err) + mockStream.series = mockStream.series[:0] + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + _, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream) + require.NoError(b, err) + mockStream.series = mockStream.series[:0] + } +} + +// pushSeries creates `nameValues` distinct __name__ values, each with `seriesPerName` +// series. Each series gets a unique pod label drawn from `podCardinality` distinct values. +// When podSharedAcrossName is true, the same pod values are reused across name values +// (otherwise pods are distinct per name to inflate label cardinality). +// +// Pushes one series at a time using writeRequestSingleSeries, which is the +// proven-working pattern in the existing benchmarks. Slow but reliable. +func pushSeries(b *testing.B, i *Ingester, ctx context.Context, nameValues, seriesPerName, podCardinality int, podSharedAcrossName bool) { + b.Helper() + sample := []cortexpb.Sample{{Value: 1, TimestampMs: 1}} + for n := range nameValues { + metric := fmt.Sprintf("metric_%d", n) + for s := range seriesPerName { + var podIdx int + if podSharedAcrossName { + podIdx = s % podCardinality + } else { + podIdx = (n*seriesPerName + s) % podCardinality + } + lbls := labels.FromStrings( + model.MetricNameLabel, metric, + "pod", "pod-"+strconv.Itoa(podIdx), + "region", "region-"+strconv.Itoa(s%10), + "job", "job-"+strconv.Itoa(s%20), + ) + _, err := i.Push(ctx, writeRequestSingleSeries(lbls, sample)) + require.NoError(b, err) + } + } +} diff --git a/pkg/storage/tsdb/expanded_postings_cache.go b/pkg/storage/tsdb/expanded_postings_cache.go index a241607c8a..1ae001a197 100644 --- a/pkg/storage/tsdb/expanded_postings_cache.go +++ b/pkg/storage/tsdb/expanded_postings_cache.go @@ -10,6 +10,7 @@ import ( "sync" "time" + "github.com/hashicorp/golang-lru/v2/expirable" "github.com/oklog/ulid/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -42,6 +43,7 @@ type ExpandedPostingsCacheMetrics struct { CacheEvicts *prometheus.CounterVec CacheMiss *prometheus.CounterVec NonCacheableQueries *prometheus.CounterVec + LazyMatcherQueries prometheus.Counter } func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetrics { @@ -66,6 +68,10 @@ func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetri Name: "cortex_ingester_expanded_postings_non_cacheable_queries_total", Help: "Total number of non cacheable queries.", }, []string{"cache"}), + LazyMatcherQueries: promauto.With(r).NewCounter(prometheus.CounterOpts{ + Name: "cortex_ingester_expanded_postings_lazy_matcher_queries_total", + Help: "Total number of queries that used lazy matcher evaluation on cache miss.", + }), } } @@ -73,6 +79,25 @@ type TSDBPostingsCacheConfig struct { Head PostingsCacheConfig `yaml:"head" doc:"description=If enabled, ingesters will cache expanded postings for the head block. Only queries with with an equal matcher for metric __name__ are cached."` Blocks PostingsCacheConfig `yaml:"blocks" doc:"description=If enabled, ingesters will cache expanded postings for the compacted blocks. The cache is shared between all blocks."` + // LazyMatcherMaxCardinality configures the maximum label cardinality threshold for + // deferring regex matchers on the head block. When a regex matcher targets a label with + // more unique values than this threshold, the matcher is applied lazily during series + // iteration instead of during postings lookup. This avoids expensive regex scans on + // high-cardinality labels when the head postings cache misses. 0 disables this optimization. + LazyMatcherMaxCardinality int `yaml:"lazy_matcher_max_cardinality"` + + // LazyMatcherSimpleCostRatio is the cardinality:postings ratio above which a + // regex matcher with a simple per-call cost (prefix-only, single contains, + // single suffix) is deferred to lazy iteration. Tuned empirically — see + // regexCostClass for derivation. Defaults to 6 when unset. + LazyMatcherSimpleCostRatio int `yaml:"lazy_matcher_simple_cost_ratio"` + + // LazyMatcherComplexCostRatio is the cardinality:postings ratio above which a + // regex matcher with a complex per-call cost (multi-substring contains, + // capture groups with literals, character classes) is deferred. Defaults to 2 + // when unset. + LazyMatcherComplexCostRatio int `yaml:"lazy_matcher_complex_cost_ratio"` + // The configurations below are used only for testing purpose PostingsForMatchers func(ctx context.Context, ix tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error) `yaml:"-"` SeedSize int `yaml:"-"` @@ -89,6 +114,9 @@ type PostingsCacheConfig struct { func (cfg *TSDBPostingsCacheConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { cfg.Head.RegisterFlagsWithPrefix(prefix, "head", f) cfg.Blocks.RegisterFlagsWithPrefix(prefix, "block", f) + f.IntVar(&cfg.LazyMatcherMaxCardinality, prefix+"expanded_postings_cache.head.lazy-matcher-max-cardinality", 0, "Maximum label cardinality for deferring regex matchers on the head block. When a regex matcher targets a label with more unique values than this threshold, it is applied lazily during iteration instead of postings lookup. 0 disables.") + f.IntVar(&cfg.LazyMatcherSimpleCostRatio, prefix+"expanded_postings_cache.head.lazy-matcher-simple-cost-ratio", defaultSimpleCostRatio, "Cardinality:postings ratio above which a simple regex (prefix-only, single contains) is deferred to lazy iteration. Lower = more aggressive deferral. Calibrated empirically; defaults to 6.") + f.IntVar(&cfg.LazyMatcherComplexCostRatio, prefix+"expanded_postings_cache.head.lazy-matcher-complex-cost-ratio", defaultComplexCostRatio, "Cardinality:postings ratio above which a complex regex (multi-substring, capture groups, character classes) is deferred. Lower = more aggressive deferral. Calibrated empirically; defaults to 2.") } // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet @@ -139,6 +167,9 @@ type blocksPostingsForMatchersCache struct { postingsForMatchersFunc func(ctx context.Context, ix tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error) timeNow func() time.Time + lazyMatcherCfg lazyMatcherConfig + labelCardinalityCache *expirable.LRU[string, int] + metrics *ExpandedPostingsCacheMetrics seedByHash *seedByHash } @@ -162,9 +193,15 @@ func newBlocksPostingsForMatchersCache(userId string, cfg TSDBPostingsCacheConfi blocksCache: newLruCache[[]storage.SeriesRef](cfg.Blocks, "block", metrics, cfg.timeNow), postingsForMatchersFunc: cfg.PostingsForMatchers, timeNow: cfg.timeNow, - metrics: metrics, - seedByHash: seedByHash, - userId: userId, + lazyMatcherCfg: lazyMatcherConfig{ + MaxCardinality: cfg.LazyMatcherMaxCardinality, + SimpleRatio: cfg.LazyMatcherSimpleCostRatio, + ComplexRatio: cfg.LazyMatcherComplexCostRatio, + }, + labelCardinalityCache: newLabelCardinalityCache(), + metrics: metrics, + seedByHash: seedByHash, + userId: userId, } } @@ -232,6 +269,16 @@ func (c *blocksPostingsForMatchersCache) fetchPostings(blockID ulid.ULID, ix tsd defer cancel() } + // For head blocks, try to avoid expensive regex scans by splitting matchers: + // resolve postings with selective matchers only, then filter by regex lazily. + if isHeadBlock(blockID) && c.lazyMatcherCfg.MaxCardinality > 0 { + selectMs, lazyMs := splitMatchersForHeadWithConfig(fetchCtx, ix, ms, c.lazyMatcherCfg, c.labelCardinalityCache) + if len(lazyMs) > 0 { + c.metrics.LazyMatcherQueries.Inc() + return c.fetchWithLazyMatchers(fetchCtx, ix, selectMs, lazyMs) + } + } + postings, err := c.postingsForMatchersFunc(fetchCtx, ix, ms...) if err == nil { @@ -265,6 +312,60 @@ func (c *blocksPostingsForMatchersCache) result(ce *cacheEntryPromise[[]storage. } } +// fetchWithLazyMatchers resolves postings using only the selective matchers, then +// filters the results by applying the lazy (regex) matchers per-series using +// LabelValueFor. A per-value cache avoids running the same regex on the same value +// more than once. +func (c *blocksPostingsForMatchersCache) fetchWithLazyMatchers(ctx context.Context, ix tsdb.IndexReader, selectMs, lazyMs []*labels.Matcher) ([]storage.SeriesRef, int64, error) { + postings, err := c.postingsForMatchersFunc(ctx, ix, selectMs...) + if err != nil { + return nil, 0, err + } + + ids, err := index.ExpandPostings(postings) + if err != nil { + return nil, 0, err + } + + // Per-matcher cache: label value -> match result + caches := make([]map[string]bool, len(lazyMs)) + for i := range lazyMs { + caches[i] = make(map[string]bool) + } + + filtered := ids[:0] + for _, id := range ids { + matches := true + for i, m := range lazyMs { + val, err := ix.LabelValueFor(ctx, id, m.Name) + if err != nil { + // Series doesn't have this label — treat as empty string + val = "" + } + + if result, ok := caches[i][val]; ok { + if !result { + matches = false + break + } + continue + } + + result := m.Matches(val) + caches[i][val] = result + if !result { + matches = false + break + } + } + if matches { + filtered = append(filtered, id) + } + } + + return filtered, int64(len(filtered) * 8), nil +} + func (c *blocksPostingsForMatchersCache) getSeedForMetricName(metricName string) string { return c.seedByHash.getSeed(c.userId, metricName) } diff --git a/pkg/storage/tsdb/lazy_matchers.go b/pkg/storage/tsdb/lazy_matchers.go new file mode 100644 index 0000000000..6b50ff98be --- /dev/null +++ b/pkg/storage/tsdb/lazy_matchers.go @@ -0,0 +1,333 @@ +package tsdb + +import ( + "context" + "strings" + "time" + + "github.com/hashicorp/golang-lru/v2/expirable" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + prom_tsdb "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/index" +) + +// regexCost classifies how expensive per-call evaluation of a regex matcher +// is, relative to a single LabelValueFor call. We use this to choose the +// cardinality:postings ratio gate that decides whether to defer a regex +// matcher to lazy iteration on the head block. +// +// Calibration is empirical, from BenchmarkIngester_LazyPosting: +// - LabelValueFor on the head ≈ 1µs per call (lock + map lookup + label parse) +// - simple regex eval (prefix-only / single contains via FastRegexMatcher +// fast paths) ≈ 200ns per call +// - complex regex eval (multi-substring containsInOrder, capture groups, +// character classes - falls through to RE2 or multi-Index) ≈ 1µs+ per call +// +// Cost model: +// +// eager_cost ≈ cardinality * regex_per_call_cost +// lazy_cost ≈ selective_postings * (LabelValueFor_cost + regex_per_call_cost) +// +// Lazy is a win when: +// +// selective_postings * (LV + regex) < cardinality * regex +// ⇒ cardinality / selective_postings > (LV + regex) / regex +// +// For simple regex (regex ≈ 200ns, LV ≈ 1µs): ratio > 6 (with margin → 6). +// For complex regex (regex ≈ 1µs, LV ≈ 1µs): ratio > 2. +type regexCost int + +const ( + // regexCostUnknown is a defensive sentinel returned by regexCostClass for + // non-regex matchers. Production code never reaches this path (callers + // type-check before invoking). + regexCostUnknown regexCost = iota + // regexCostSimple covers regexes that the FastRegexMatcher fast-paths via + // setMatches, prefix anchoring, suffix anchoring, or single-contains. Per-call + // cost is dominated by the underlying string op, not RE2 evaluation. + regexCostSimple + // regexCostComplex covers everything else: multi-substring contains + // (.*a.*b.*), alternation of contains, capture groups with siblings, + // character classes, lookaheads, etc. Per-call cost includes the full RE2 + // fallback or multi-step containsInOrder. + regexCostComplex +) + +// Calibrated default cost ratios. Used both as struct field defaults and by +// the flag registration in TSDBPostingsCacheConfig.RegisterFlagsWithPrefix. +// See regexCost docstring for derivation. +const ( + defaultSimpleCostRatio = 6 + defaultComplexCostRatio = 2 +) + +// regexCostClass returns the per-call cost class of a regex matcher. +// +// Notes: +// - Matchers with non-empty SetMatches() are short-circuited by the +// postingsForMatcher fast-path and never reach our lazy code, but we +// classify them as simple for safety. +// - Matchers with a non-empty Prefix() are also fast-pathed differently +// in postingsForLabelMatching (containsInOrder fast-reject), so they're +// classified as simple. +// - Negative regex (MatchNotRegexp) is classified the same as MatchRegexp +// since the per-call evaluation cost is identical. +func regexCostClass(m *labels.Matcher) regexCost { + if m.Type != labels.MatchRegexp && m.Type != labels.MatchNotRegexp { + return regexCostUnknown + } + + v := m.GetRegexString() + + // Prefix-only regex (e.g. `foo.*`): the FastRegexMatcher uses HasPrefix + // per call. But a regex like `^foo[0-9]+$` ALSO has Prefix()=="foo" + // while requiring full RE2 evaluation on positive matches. We can + // distinguish by checking the regex string: the prefix is "simple" only + // when the remainder of the regex is trivially-matching (.* or empty). + if p := m.Prefix(); p != "" { + if isPureLiteralPrefix(v, p) { + return regexCostSimple + } + // Prefix exists but the remainder is non-trivial — RE2 still runs + // on positive matches. + return regexCostComplex + } + + // At this point the regex has no setMatches and no prefix. Use the regex + // string to detect the remaining "simple" shapes the FastRegexMatcher + // optimizes specially. + switch { + case isSingleContainsRegex(v): + // .*foo.* — vanilla extracts m.contains=["foo"], runs containsInOrder + // once per value (single strings.Index call). Per-call ≈ regex cost. + return regexCostSimple + case isPureSuffixRegex(v): + // .*foo — extracted as m.suffix; HasSuffix per call. + return regexCostSimple + } + + return regexCostComplex +} + +// isPureLiteralPrefix returns true when the regex string is just +// optionally followed by `.*` or `.*$` (trivial tail). This is the +// pattern shape the FastRegexMatcher fully fast-paths via HasPrefix +// without falling through to RE2. +func isPureLiteralPrefix(regex, prefix string) bool { + // Strip optional ^ anchor. + r := strings.TrimPrefix(regex, "^") + // The regex must start with the literal prefix. + if !strings.HasPrefix(r, prefix) { + return false + } + rest := r[len(prefix):] + // Strip optional $ anchor. + rest = strings.TrimSuffix(rest, "$") + // Trailing must be empty (anchored exact prefix), `.*` (any tail), or + // `.+` (any non-empty tail). Anything else (character class, alternation, + // nested groups, additional literals) requires the full regex engine. + return rest == "" || rest == ".*" || rest == ".+" +} + +// isSingleContainsRegex returns true for `.*.*` patterns where +// contains no regex metacharacters. +func isSingleContainsRegex(s string) bool { + if !strings.HasPrefix(s, ".*") || !strings.HasSuffix(s, ".*") || len(s) <= 4 { + return false + } + inner := s[2 : len(s)-2] + return inner != "" && !containsRegexMeta(inner) +} + +// isPureSuffixRegex returns true for `.*` patterns where +// contains no regex metacharacters and the pattern has no trailing .* +// (otherwise it's single-contains). +func isPureSuffixRegex(s string) bool { + if !strings.HasPrefix(s, ".*") || strings.HasSuffix(s, ".*") { + return false + } + return !containsRegexMeta(s[2:]) +} + +// containsRegexMeta reports whether s contains any regex metacharacter. +func containsRegexMeta(s string) bool { + for i := 0; i < len(s); i++ { + switch s[i] { + case '.', '+', '*', '?', '|', '(', ')', '[', ']', '{', '}', '\\', '^', '$': + return true + } + } + return false +} + +// lazyMatcherConfig configures the cost-ratio gates used by +// splitMatchersForHeadWithConfig. Zero-valued SimpleRatio/ComplexRatio +// fields are treated as "use the calibrated default" (defaultSimpleCostRatio +// and defaultComplexCostRatio respectively), NOT as "no margin" — guarding +// callers who construct the config struct programmatically without going +// through flag registration. +type lazyMatcherConfig struct { + // MaxCardinality is the floor cardinality below which a label is never + // considered for lazy evaluation, regardless of selectivity. + MaxCardinality int + // SimpleRatio is the cardinality:postings ratio above which simple regex + // matchers are deferred. Tuned empirically; see regexCostClass docs. + // 0 means "use defaultSimpleCostRatio". + SimpleRatio int + // ComplexRatio is the cardinality:postings ratio above which complex regex + // matchers are deferred. 0 means "use defaultComplexCostRatio". + ComplexRatio int +} + +// splitMatchersForHeadWithConfig separates matchers into those used for postings +// lookup and those applied lazily during iteration, using the configured cost +// ratios. See lazyMatcherConfig. +// +// A matcher is deferred only when ALL of: +// - The query already contains a __name__ equality matcher (anchors selectivity) +// - The matcher is a regex or negative regex on a non-__name__ label +// - The label's cardinality exceeds MaxCardinality +// - cardinality > minSelectPostings * ratio, where ratio depends on the +// regex's per-call cost class (see regexCostClass) +func splitMatchersForHeadWithConfig(ctx context.Context, ix prom_tsdb.IndexReader, ms []*labels.Matcher, cfg lazyMatcherConfig, cardinalityCache *expirable.LRU[string, int]) (selectMatchers, lazyMatchers []*labels.Matcher) { + if cfg.MaxCardinality <= 0 || len(ms) < 2 { + return ms, nil + } + // Treat zero-valued ratios as "use the calibrated default", not as + // "ratio of 1" (which would silently fall back to the original broken + // gate). This protects programmatic callers who construct the config + // without flag-registration defaults. + if cfg.SimpleRatio < 1 { + cfg.SimpleRatio = defaultSimpleCostRatio + } + if cfg.ComplexRatio < 1 { + cfg.ComplexRatio = defaultComplexCostRatio + } + + hasMetricNameMatcher := false + for _, m := range ms { + if m.Name == labels.MetricName && m.Type == labels.MatchEqual { + hasMetricNameMatcher = true + break + } + } + if !hasMetricNameMatcher { + return ms, nil + } + + // First pass: identify regex matchers that are candidates for deferral and + // estimate the number of series the selective (equality) matchers would return. + type regexCandidate struct { + matcher *labels.Matcher + cardinality int + cost regexCost + } + + var candidates []regexCandidate + selectMatchers = make([]*labels.Matcher, 0, len(ms)) + minSelectPostings := 0 + + for _, m := range ms { + if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp { + // Never defer __name__ regex matchers. + if m.Name == labels.MetricName { + selectMatchers = append(selectMatchers, m) + continue + } + + // Matchers with SetMatches (e.g. "foo|bar|baz") are resolved via + // direct posting lookups in postingsForMatcher — already fast. + // Never defer these. + if len(m.SetMatches()) > 0 { + selectMatchers = append(selectMatchers, m) + continue + } + + // Check if the label has high cardinality. + cardinality := labelCardinality(ctx, ix, m.Name, cardinalityCache) + if cardinality <= cfg.MaxCardinality { + selectMatchers = append(selectMatchers, m) + continue + } + + candidates = append(candidates, regexCandidate{ + matcher: m, + cardinality: cardinality, + cost: regexCostClass(m), + }) + continue + } + + selectMatchers = append(selectMatchers, m) + if m.Type == labels.MatchEqual { + if n := postingsLen(ctx, ix, m.Name, m.Value); n > 0 { + if minSelectPostings == 0 || n < minSelectPostings { + minSelectPostings = n + } + } + } + } + + if len(candidates) == 0 || minSelectPostings == 0 { + return ms, nil + } + + for _, c := range candidates { + ratio := cfg.SimpleRatio + if c.cost == regexCostComplex { + ratio = cfg.ComplexRatio + } + // Defer only when lazy iteration is cheaper than the eager scan. + // Cost model: cardinality * regex_per_call > selective_postings * (LV + regex). + if c.cardinality > minSelectPostings*ratio { + lazyMatchers = append(lazyMatchers, c.matcher) + } else { + selectMatchers = append(selectMatchers, c.matcher) + } + } + + if len(lazyMatchers) == 0 { + return ms, nil + } + + return selectMatchers, lazyMatchers +} + +// postingsLen returns the number of series matching a single label pair. +// For the head block, Postings() for a single value returns a *ListPostings +// directly, so Len() is O(1) — just a slice length read. +func postingsLen(ctx context.Context, ix prom_tsdb.IndexReader, name, value string) int { + p, err := ix.Postings(ctx, name, value) + if err != nil { + return 0 + } + if lp, ok := p.(*index.ListPostings); ok { + return lp.Len() + } + return 0 +} + +const ( + labelCardinalityTTL = 60 * time.Second + labelCardinalityCacheSize = 10000 // max label names cached per tenant +) + +// newLabelCardinalityCache creates a bounded, TTL-expiring cache for label cardinality. +func newLabelCardinalityCache() *expirable.LRU[string, int] { + return expirable.NewLRU[string, int](labelCardinalityCacheSize, nil, labelCardinalityTTL) +} + +// labelCardinality returns the number of unique values for a label, using a +// cache to avoid repeated LabelValues calls on the head block. +func labelCardinality(ctx context.Context, ix prom_tsdb.IndexReader, name string, cache *expirable.LRU[string, int]) int { + if v, ok := cache.Get(name); ok { + return v + } + vals, err := ix.LabelValues(ctx, name, (*storage.LabelHints)(nil)) + if err != nil { + return 0 + } + cache.Add(name, len(vals)) + return len(vals) +} diff --git a/pkg/storage/tsdb/lazy_matchers_test.go b/pkg/storage/tsdb/lazy_matchers_test.go new file mode 100644 index 0000000000..8ea641b4a2 --- /dev/null +++ b/pkg/storage/tsdb/lazy_matchers_test.go @@ -0,0 +1,499 @@ +package tsdb + +import ( + "context" + "testing" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + prom_tsdb "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/index" + "github.com/stretchr/testify/assert" +) + +func TestSplitMatchersForHead(t *testing.T) { + ctx := context.Background() + + ir := &mockIndexReader{ + labelValues: map[string][]string{ + "__name__": {"cpu", "memory", "disk"}, + "pod": generateValues("pod-", 50000), + "namespace": {"prod", "staging", "dev"}, + "service": {"api", "worker", "gateway", "frontend", "backend"}, + "job": {"api", "worker", "gateway"}, + }, + postingsCounts: map[string]int{ + "__name__\xffcpu": 1000, + "__name__\xffmemory": 800, + "service\xffapi": 200, + "service\xffworker": 300, + "namespace\xffprod": 500, + "namespace\xffstaging": 300, + "namespace\xffdev": 200, + }, + } + + tests := []struct { + name string + matchers []*labels.Matcher + maxCardinality int + wantSelect int + wantLazy int + wantLazyLabels []string + }{ + { + name: "regex on high-cardinality label with selective equality matcher - deferred", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "service", "api"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"), + }, + maxCardinality: 10000, + wantSelect: 2, // __name__ + service + wantLazy: 1, + wantLazyLabels: []string{"pod"}, + }, + { + name: "regex on high-cardinality label with only __name__ equality - deferred (name is selective)", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"), + }, + maxCardinality: 10000, + wantSelect: 1, + wantLazy: 1, + wantLazyLabels: []string{"pod"}, + }, + { + name: "regex on low-cardinality label - NOT deferred regardless", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "service", "api"), + labels.MustNewMatcher(labels.MatchRegexp, "namespace", "prod|staging"), + }, + maxCardinality: 10000, + wantSelect: 3, // namespace only has 3 values, below threshold + wantLazy: 0, + }, + { + name: "no __name__ matcher - nothing deferred", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"), + labels.MustNewMatcher(labels.MatchEqual, "namespace", "prod"), + }, + maxCardinality: 10000, + wantSelect: 2, + wantLazy: 0, + }, + { + name: "disabled when maxCardinality is 0", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "service", "api"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"), + }, + maxCardinality: 0, + wantSelect: 3, + wantLazy: 0, + }, + { + name: "negative regex on high-cardinality with selective matcher - deferred", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "namespace", "prod"), + labels.MustNewMatcher(labels.MatchNotRegexp, "pod", ".*test.*"), + }, + maxCardinality: 10000, + wantSelect: 2, + wantLazy: 1, + wantLazyLabels: []string{"pod"}, + }, + { + name: "__name__ regex is never deferred", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "service", "api"), + labels.MustNewMatcher(labels.MatchRegexp, "__name__", "cpu|memory"), + }, + maxCardinality: 1, + wantSelect: 3, + wantLazy: 0, + }, + { + name: "single matcher - nothing deferred", + matchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + }, + maxCardinality: 1, + wantSelect: 1, + wantLazy: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + selectMs, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, tt.matchers, lazyMatcherConfig{ + MaxCardinality: tt.maxCardinality, + // Use ratio=1 so this test continues to assert on the old + // (cardinality > minSelectPostings) gate semantics. Cost-ratio + // behavior is covered separately in TestSplitMatchersForHead_CostRatio. + SimpleRatio: 1, + ComplexRatio: 1, + }, newLabelCardinalityCache()) + assert.Len(t, selectMs, tt.wantSelect, "select matchers count") + assert.Len(t, lazyMs, tt.wantLazy, "lazy matchers count") + + for i, name := range tt.wantLazyLabels { + assert.Equal(t, name, lazyMs[i].Name) + } + }) + } +} + +func TestFetchWithLazyMatchers(t *testing.T) { + ctx := context.Background() + + // Build an in-memory head with known series + ir := &mockIndexReaderWithSeries{ + mockIndexReader: mockIndexReader{ + labelValues: map[string][]string{ + "__name__": {"cpu"}, + "pod": {"web-1", "web-2", "worker-1", "worker-2", "api-1"}, + "service": {"frontend", "backend"}, + }, + }, + series: map[storage.SeriesRef]labels.Labels{ + 1: labels.FromStrings("__name__", "cpu", "pod", "web-1", "service", "frontend"), + 2: labels.FromStrings("__name__", "cpu", "pod", "web-2", "service", "frontend"), + 3: labels.FromStrings("__name__", "cpu", "pod", "worker-1", "service", "backend"), + 4: labels.FromStrings("__name__", "cpu", "pod", "worker-2", "service", "backend"), + 5: labels.FromStrings("__name__", "cpu", "pod", "api-1", "service", "backend"), + }, + } + + cache := &blocksPostingsForMatchersCache{ + postingsForMatchersFunc: func(_ context.Context, ix prom_tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error) { + // Simulate: selectMs = [__name__="cpu", service="frontend"] -> returns refs 1, 2 + return index.NewListPostings([]storage.SeriesRef{1, 2, 3, 4, 5}[:2]), nil + }, + } + + selectMs := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + labels.MustNewMatcher(labels.MatchEqual, "service", "frontend"), + } + lazyMs := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "pod", "web.*"), + } + + refs, size, err := cache.fetchWithLazyMatchers(ctx, ir, selectMs, lazyMs) + assert.NoError(t, err) + assert.Equal(t, int64(len(refs)*8), size) + // Both series 1 and 2 have pod=web-*, so both should match + assert.Equal(t, []storage.SeriesRef{1, 2}, refs) +} + +func TestFetchWithLazyMatchers_FiltersCorrectly(t *testing.T) { + ctx := context.Background() + + ir := &mockIndexReaderWithSeries{ + mockIndexReader: mockIndexReader{ + labelValues: map[string][]string{ + "pod": {"web-1", "worker-1", "web-2"}, + }, + }, + series: map[storage.SeriesRef]labels.Labels{ + 1: labels.FromStrings("pod", "web-1"), + 2: labels.FromStrings("pod", "worker-1"), + 3: labels.FromStrings("pod", "web-2"), + }, + } + + cache := &blocksPostingsForMatchersCache{ + postingsForMatchersFunc: func(_ context.Context, _ prom_tsdb.IndexReader, _ ...*labels.Matcher) (index.Postings, error) { + return index.NewListPostings([]storage.SeriesRef{1, 2, 3}), nil + }, + } + + selectMs := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"), + } + lazyMs := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "pod", "web.*"), + } + + refs, _, err := cache.fetchWithLazyMatchers(ctx, ir, selectMs, lazyMs) + assert.NoError(t, err) + assert.Equal(t, []storage.SeriesRef{1, 3}, refs) +} + +// --- Mocks --- + +type mockIndexReader struct { + prom_tsdb.IndexReader + labelValues map[string][]string + postingsCounts map[string]int // "name\xffvalue" -> count +} + +func (m *mockIndexReader) LabelValues(_ context.Context, name string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) { + return m.labelValues[name], nil +} + +func (m *mockIndexReader) Close() error { return nil } +func (m *mockIndexReader) Symbols() index.StringIter { return nil } +func (m *mockIndexReader) LabelNames(_ context.Context, _ ...*labels.Matcher) ([]string, error) { + return nil, nil +} +func (m *mockIndexReader) SortedLabelValues(_ context.Context, _ string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) { + return nil, nil +} +func (m *mockIndexReader) Postings(_ context.Context, name string, values ...string) (index.Postings, error) { + if m.postingsCounts != nil && len(values) == 1 { + key := name + "\xff" + values[0] + if n, ok := m.postingsCounts[key]; ok { + refs := make([]storage.SeriesRef, n) + for i := range refs { + refs[i] = storage.SeriesRef(i + 1) + } + return index.NewListPostings(refs), nil + } + } + return index.EmptyPostings(), nil +} +func (m *mockIndexReader) PostingsForLabelMatching(_ context.Context, _ string, _ func(string) bool) index.Postings { + return index.EmptyPostings() +} +func (m *mockIndexReader) PostingsForAllLabelValues(_ context.Context, _ string) index.Postings { + return index.EmptyPostings() +} +func (m *mockIndexReader) SortedPostings(p index.Postings) index.Postings { return p } +func (m *mockIndexReader) ShardedPostings(p index.Postings, _, _ uint64) index.Postings { return p } +func (m *mockIndexReader) Series(_ storage.SeriesRef, _ *labels.ScratchBuilder, _ *[]chunks.Meta) error { + return nil +} +func (m *mockIndexReader) LabelValueFor(_ context.Context, _ storage.SeriesRef, _ string) (string, error) { + return "", storage.ErrNotFound +} +func (m *mockIndexReader) LabelNamesFor(_ context.Context, _ index.Postings) ([]string, error) { + return nil, nil +} + +// mockIndexReaderWithSeries extends mockIndexReader with series label data +type mockIndexReaderWithSeries struct { + mockIndexReader + series map[storage.SeriesRef]labels.Labels +} + +func (m *mockIndexReaderWithSeries) LabelValueFor(_ context.Context, id storage.SeriesRef, label string) (string, error) { + lbls, ok := m.series[id] + if !ok { + return "", storage.ErrNotFound + } + v := lbls.Get(label) + if v == "" { + return "", storage.ErrNotFound + } + return v, nil +} + +func generateValues(prefix string, count int) []string { + vals := make([]string, count) + for i := range vals { + vals[i] = prefix + string(rune('0'+i%10)) + string(rune('0'+i/10%10)) + } + return vals +} + +// TestRegexCostClass verifies the complexity classifier we use to choose +// the cardinality:postings ratio gate. The classifier MUST agree with the +// fast-path semantics in postingsForMatcher: regexes that prometheus would +// short-circuit via setMatches or prefix-only matching are "simple" (cheap +// per-call); everything else (multi-substring contains, captures, character +// classes) is "complex" (expensive per-call, lazy iteration wins at lower +// cardinality:postings ratio). +func TestRegexCostClass(t *testing.T) { + cases := []struct { + name string + matcher *labels.Matcher + wantClass regexCost + }{ + { + "prefix only - cheap containsInOrder fast-reject", + labels.MustNewMatcher(labels.MatchRegexp, "x", "foo.*"), + regexCostSimple, + }, + { + "single contains - moderate", + labels.MustNewMatcher(labels.MatchRegexp, "x", ".*foo.*"), + regexCostSimple, + }, + { + "multi-substring contains - complex", + labels.MustNewMatcher(labels.MatchRegexp, "x", ".*foo.*bar.*"), + regexCostComplex, + }, + { + "capture group + literal - complex", + labels.MustNewMatcher(labels.MatchRegexp, "x", "(.+)-(.+)-(.+)"), + regexCostComplex, + }, + { + "alternation of contains - complex", + labels.MustNewMatcher(labels.MatchRegexp, "x", ".*a.*|.*b.*|.*c.*"), + regexCostComplex, + }, + { + "plain anchored regex with character class - complex", + labels.MustNewMatcher(labels.MatchRegexp, "x", "^foo[0-9]+$"), + regexCostComplex, + }, + { + "NotRegexp single contains - moderate", + labels.MustNewMatcher(labels.MatchNotRegexp, "x", ".*foo.*"), + regexCostSimple, + }, + { + "NotRegexp multi-substring - complex", + labels.MustNewMatcher(labels.MatchNotRegexp, "x", ".*a.*b.*"), + regexCostComplex, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := regexCostClass(tc.matcher) + assert.Equal(t, tc.wantClass, got, "%s: got %v want %v", tc.matcher.String(), got, tc.wantClass) + }) + } +} + +// TestSplitMatchersForHead_ZeroRatioUsesDefaults verifies that programmatic +// callers who construct lazyMatcherConfig without going through flag +// registration get the calibrated defaults (6 and 2), NOT a clamped 1 (which +// would silently re-introduce the original broken gate). See the regexCost +// docstring for the cost model. +func TestSplitMatchersForHead_ZeroRatioUsesDefaults(t *testing.T) { + ctx := context.Background() + ir := &mockIndexReader{ + labelValues: map[string][]string{ + "__name__": {"metric_a"}, + "pod": generateValues("pod-", 30000), + }, + postingsCounts: map[string]int{ + "__name__\xffmetric_a": 10000, + }, + } + matchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "metric_a"), + // simple regex: prefix-only. + labels.MustNewMatcher(labels.MatchRegexp, "pod", "foo.*"), + } + // Cardinality:postings ratio = 30000/10000 = 3. + // With ratio=1 (clamped), 3 > 1 → would defer. + // With default ratio=6, 3 > 6 is false → must NOT defer. + cfg := lazyMatcherConfig{ + MaxCardinality: 1000, + // SimpleRatio and ComplexRatio left zero — must use defaults. + } + _, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, matchers, cfg, newLabelCardinalityCache()) + assert.Len(t, lazyMs, 0, "zero SimpleRatio must default to %d, not be clamped to 1", defaultSimpleCostRatio) +} + +// The original gate was `cardinality > minSelectPostings`, which incorrectly +// deferred regex evaluation when LabelValueFor (per-series) cost would exceed +// PostingsForLabelMatching (per-value) cost. The fixed gate is +// `cardinality > minSelectPostings * ratio` where ratio depends on regex cost +// class: +// - simple regex (prefix-only / single contains): ratio=6 (LabelValueFor is +// ~5x more expensive than a fast-path regex evaluation, +1 margin) +// - complex regex (multi-substring, capture, char class): ratio=2 (per-call +// regex cost is high enough that lazy wins at lower ratio) +func TestSplitMatchersForHead_CostRatio(t *testing.T) { + const ( + simpleRatio = 6 + complexRatio = 2 + ) + // Build an index where __name__=metric_a has 10000 series and pod has + // varying cardinalities to test the gate. + build := func(podCard int) *mockIndexReader { + return &mockIndexReader{ + labelValues: map[string][]string{ + "__name__": {"metric_a"}, + "pod": generateValues("pod-", podCard), + }, + postingsCounts: map[string]int{ + "__name__\xffmetric_a": 10000, + }, + } + } + + cases := []struct { + name string + podCard int + regex string + simpleRatio int + complexRatio int + wantLazyCount int + }{ + { + // 20K cardinality, 10K postings → ratio = 2 → for SIMPLE regex this + // is below the 6x threshold; should NOT defer (this is the + // `balanced_select` failure mode the original code triggered). + name: "simple regex 2x ratio - NOT deferred (cost gate)", + podCard: 20000, + regex: "foo.*", + simpleRatio: simpleRatio, + complexRatio: complexRatio, + wantLazyCount: 0, + }, + { + // 100K cardinality, 10K postings → ratio = 10 → above 6x; defer. + name: "simple regex 10x ratio - deferred", + podCard: 100000, + regex: "foo.*", + simpleRatio: simpleRatio, + complexRatio: complexRatio, + wantLazyCount: 1, + }, + { + // 25K cardinality, 10K postings → ratio = 2.5 → above complex + // threshold of 2; defer. + name: "complex regex 2.5x ratio - deferred", + podCard: 25000, + regex: ".*foo.*bar.*", + simpleRatio: simpleRatio, + complexRatio: complexRatio, + wantLazyCount: 1, + }, + { + // 15K cardinality, 10K postings → ratio = 1.5 → below complex + // threshold of 2; do NOT defer. + name: "complex regex 1.5x ratio - NOT deferred", + podCard: 15000, + regex: ".*foo.*bar.*", + simpleRatio: simpleRatio, + complexRatio: complexRatio, + wantLazyCount: 0, + }, + } + + ctx := context.Background() + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ir := build(tc.podCard) + matchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, "__name__", "metric_a"), + labels.MustNewMatcher(labels.MatchRegexp, "pod", tc.regex), + } + cfg := lazyMatcherConfig{ + MaxCardinality: 1000, // engage threshold (< all podCard above) + SimpleRatio: tc.simpleRatio, + ComplexRatio: tc.complexRatio, + } + _, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, matchers, cfg, newLabelCardinalityCache()) + assert.Len(t, lazyMs, tc.wantLazyCount, + "podCard=%d regex=%q: lazyCount mismatch", tc.podCard, tc.regex) + }) + } +} diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index e93e7c36a4..fb1bee6593 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -3045,6 +3045,24 @@ } }, "type": "object" + }, + "lazy_matcher_complex_cost_ratio": { + "default": 2, + "description": "Cardinality:postings ratio above which a complex regex (multi-substring, capture groups, character classes) is deferred. Lower = more aggressive deferral. Calibrated empirically; defaults to 2.", + "type": "number", + "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio" + }, + "lazy_matcher_max_cardinality": { + "default": 0, + "description": "Maximum label cardinality for deferring regex matchers on the head block. When a regex matcher targets a label with more unique values than this threshold, it is applied lazily during iteration instead of postings lookup. 0 disables.", + "type": "number", + "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality" + }, + "lazy_matcher_simple_cost_ratio": { + "default": 6, + "description": "Cardinality:postings ratio above which a simple regex (prefix-only, single contains) is deferred to lazy iteration. Lower = more aggressive deferral. Calibrated empirically; defaults to 6.", + "type": "number", + "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio" } }, "type": "object"