From 5f72154afd460852e917aee109bc396a451220aa Mon Sep 17 00:00:00 2001
From: Alan Protasio <approtas@amazon.com>
Date: Fri, 22 May 2026 17:26:44 +0000
Subject: [PATCH] perf(ingester): lazy regex evaluation on head postings cache
 miss

When the expanded postings cache misses on the head block, regex matchers
on high-cardinality labels (e.g. pod with 400K+ values) dominate query
cost. This PR defers expensive regex matchers to a lazy per-series
evaluation when a selective equality matcher already narrows the result
set significantly.

On cache miss, splitMatchersForHeadWithConfig splits matchers into:
- Selective matchers (equality, low-card regex) for postings lookup
- Lazy matchers (high-card regex) applied per-series via LabelValueFor

A cost-ratio gate decides when deferral is worthwhile:
- Simple regex (single contains, prefix): cardinality > selectivePostings * 6
- Complex regex (multi-substring, capture groups): cardinality > selectivePostings * 2

Label cardinality lookups are cached in an expirable LRU (60s TTL) to
avoid repeated LabelValues calls under load.

Benchmark (realistic pod names, 413K cardinality, 9K selective postings):
- Eager: 62ms, 29.8MB per query
- Lazy:  14ms, 12.6MB per query (4.5x faster, 58% less memory)

New flags (disabled by default with max-cardinality=0):
- blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality
- blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio
- blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio
---
 CHANGELOG.md                                  |   1 +
 docs/blocks-storage/querier.md                |  19 +
 docs/blocks-storage/store-gateway.md          |  19 +
 docs/configuration/config-file-reference.md   |  19 +
 integration/query_fuzz_test.go                | 274 ++++++++++
 .../ingester_lazy_posting_bench_test.go       | 288 ++++++++++
 pkg/storage/tsdb/expanded_postings_cache.go   | 107 +++-
 pkg/storage/tsdb/lazy_matchers.go             | 333 ++++++++++++
 pkg/storage/tsdb/lazy_matchers_test.go        | 499 ++++++++++++++++++
 schemas/cortex-config-schema.json             |  18 +
 10 files changed, 1574 insertions(+), 3 deletions(-)
 create mode 100644 pkg/ingester/ingester_lazy_posting_bench_test.go
 create mode 100644 pkg/storage/tsdb/lazy_matchers.go
 create mode 100644 pkg/storage/tsdb/lazy_matchers_test.go
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d5e891243..7d81fa4d17 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@
 * [ENHANCEMENT] Distributor: Add HMAC-SHA256 stream authentication for `PushStream` via `-distributor.sign-write-requests-keys`. #7475
 * [ENHANCEMENT] Instrument Ingester CPU profile with source for read APIs. #7494
 * [ENHANCEMENT] Ingester: Convert expanded postings cache from FIFO to LRU eviction to retain frequently-queried entries under memory pressure. #7510
+* [ENHANCEMENT] Ingester: Add lazy regex evaluation on head postings cache miss. Defers expensive regex matchers on high-cardinality labels to per-series filtering when a selective equality matcher already narrows the result set. Configured via `-blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality` (disabled by default). #7553
 * [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370
 * [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380
 * [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389
diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md
index 24bc6a4c3a..f9d7dfe0fc 100644
--- a/docs/blocks-storage/querier.md
+++ b/docs/blocks-storage/querier.md
@@ -1970,6 +1970,25 @@ blocks_storage:
         # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout
         [fetch_timeout: <duration> | default = 0s]
 
+      # Maximum label cardinality for deferring regex matchers on the head
+      # block. When a regex matcher targets a label with more unique values than
+      # this threshold, it is applied lazily during iteration instead of
+      # postings lookup. 0 disables.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality
+      [lazy_matcher_max_cardinality: <int> | default = 0]
+
+      # Cardinality:postings ratio above which a simple regex (prefix-only,
+      # single contains) is deferred to lazy iteration. Lower = more aggressive
+      # deferral. Calibrated empirically; defaults to 6.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio
+      [lazy_matcher_simple_cost_ratio: <int> | default = 6]
+
+      # Cardinality:postings ratio above which a complex regex (multi-substring,
+      # capture groups, character classes) is deferred. Lower = more aggressive
+      # deferral. Calibrated empirically; defaults to 2.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio
+      [lazy_matcher_complex_cost_ratio: <int> | default = 2]
+
   users_scanner:
     # Strategy to use to scan users. Supported values are: list, user_index.
     # CLI flag: -blocks-storage.users-scanner.strategy
diff --git a/docs/blocks-storage/store-gateway.md b/docs/blocks-storage/store-gateway.md
index 965a9089f2..9f3258e66a 100644
--- a/docs/blocks-storage/store-gateway.md
+++ b/docs/blocks-storage/store-gateway.md
@@ -2028,6 +2028,25 @@ blocks_storage:
         # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout
         [fetch_timeout: <duration> | default = 0s]
 
+      # Maximum label cardinality for deferring regex matchers on the head
+      # block. When a regex matcher targets a label with more unique values than
+      # this threshold, it is applied lazily during iteration instead of
+      # postings lookup. 0 disables.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality
+      [lazy_matcher_max_cardinality: <int> | default = 0]
+
+      # Cardinality:postings ratio above which a simple regex (prefix-only,
+      # single contains) is deferred to lazy iteration. Lower = more aggressive
+      # deferral. Calibrated empirically; defaults to 6.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio
+      [lazy_matcher_simple_cost_ratio: <int> | default = 6]
+
+      # Cardinality:postings ratio above which a complex regex (multi-substring,
+      # capture groups, character classes) is deferred. Lower = more aggressive
+      # deferral. Calibrated empirically; defaults to 2.
+      # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio
+      [lazy_matcher_complex_cost_ratio: <int> | default = 2]
+
   users_scanner:
     # Strategy to use to scan users. Supported values are: list, user_index.
     # CLI flag: -blocks-storage.users-scanner.strategy
diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
index b71490c2d9..dc1ab9071d 100644
--- a/docs/configuration/config-file-reference.md
+++ b/docs/configuration/config-file-reference.md
@@ -2650,6 +2650,25 @@ tsdb:
       # CLI flag: -blocks-storage.expanded_postings_cache.block.fetch-timeout
       [fetch_timeout: <duration> | default = 0s]
 
+    # Maximum label cardinality for deferring regex matchers on the head block.
+    # When a regex matcher targets a label with more unique values than this
+    # threshold, it is applied lazily during iteration instead of postings
+    # lookup. 0 disables.
+    # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality
+    [lazy_matcher_max_cardinality: <int> | default = 0]
+
+    # Cardinality:postings ratio above which a simple regex (prefix-only, single
+    # contains) is deferred to lazy iteration. Lower = more aggressive deferral.
+    # Calibrated empirically; defaults to 6.
+    # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio
+    [lazy_matcher_simple_cost_ratio: <int> | default = 6]
+
+    # Cardinality:postings ratio above which a complex regex (multi-substring,
+    # capture groups, character classes) is deferred. Lower = more aggressive
+    # deferral. Calibrated empirically; defaults to 2.
+    # CLI flag: -blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio
+    [lazy_matcher_complex_cost_ratio: <int> | default = 2]
+
 users_scanner:
   # Strategy to use to scan users. Supported values are: list, user_index.
   # CLI flag: -blocks-storage.users-scanner.strategy
diff --git a/integration/query_fuzz_test.go b/integration/query_fuzz_test.go
index f735bd91d5..a011ae8c8e 100644
--- a/integration/query_fuzz_test.go
+++ b/integration/query_fuzz_test.go
@@ -662,6 +662,280 @@ func TestExpandedPostingsCacheFuzz(t *testing.T) {
 	}
 }
 
+// TestLazyMatchersFuzz fuzzes PromQL queries against two cortex instances with
+// identical data:
+//   - cortex-1: head expanded-postings cache enabled, lazy matcher DISABLED
+//     (the eager path - regex applied during postings lookup).
+//   - cortex-2: head expanded-postings cache enabled, lazy matcher ENABLED
+//     with aggressive thresholds (cardinality=1, both cost ratios=1) so the
+//     optimization fires on every regex matcher.
+//
+// The test verifies:
+//  1. Query results match between the two instances (correctness).
+//  2. The cortex_ingester_expanded_postings_lazy_matcher_queries_total counter
+//     is incremented on cortex-2 (the optimization actually triggers).
+func TestLazyMatchersFuzz(t *testing.T) {
+	s, err := e2e.NewScenario(networkName)
+	require.NoError(t, err)
+	defer s.Close()
+
+	// Start dependencies.
+	consul1 := e2edb.NewConsulWithName("consul1")
+	consul2 := e2edb.NewConsulWithName("consul2")
+	require.NoError(t, s.StartAndWaitReady(consul1, consul2))
+
+	baseFlags := mergeFlags(
+		AlertmanagerLocalFlags(),
+		map[string]string{
+			"-store.engine":                                         blocksStorageEngine,
+			"-blocks-storage.backend":                               "filesystem",
+			"-blocks-storage.tsdb.head-compaction-interval":         "4m",
+			"-blocks-storage.tsdb.block-ranges-period":              "2h",
+			"-blocks-storage.tsdb.ship-interval":                    "1h",
+			"-blocks-storage.bucket-store.sync-interval":            "15m",
+			"-blocks-storage.tsdb.retention-period":                 "2h",
+			"-blocks-storage.bucket-store.index-cache.backend":      tsdb.IndexCacheBackendInMemory,
+			"-blocks-storage.bucket-store.bucket-index.enabled":     "true",
+			"-blocks-storage.expanded_postings_cache.head.enabled":  "true",
+			"-blocks-storage.expanded_postings_cache.block.enabled": "true",
+			"-distributor.replication-factor":                       "1",
+			"-store-gateway.sharding-enabled":                       "false",
+			"-alertmanager.web.external-url":                        "http://localhost/alertmanager",
+			// The alertmanager initializes a memberlist gossip ring that auto-
+			// detects a private RFC1918 IP. On Docker networks where containers
+			// get non-private IPs (e.g. the 240.0.0.0/4 reserved range), this
+			// detection hard-fails. Setting an explicit advertise address skips
+			// the autodetection — the value is unused since we don't enable HA
+			// peers, but presence of the flag is enough.
+			"-alertmanager.cluster.advertise-address": "127.0.0.1:9094",
+		},
+	)
+
+	// cortex-1: eager path. Lazy matcher disabled (default).
+	flags1 := mergeFlags(baseFlags, map[string]string{
+		"-ring.store":                        "consul",
+		"-consul.hostname":                   consul1.NetworkHTTPEndpoint(),
+		"-ingester.matchers-cache-max-items": "10000",
+	})
+
+	// cortex-2: lazy path. Aggressive thresholds force the optimization to
+	// fire on essentially every regex matcher, so we exercise the lazy code
+	// path repeatedly for correctness verification.
+	flags2 := mergeFlags(baseFlags, map[string]string{
+		"-ring.store":                        "consul",
+		"-consul.hostname":                   consul2.NetworkHTTPEndpoint(),
+		"-ingester.matchers-cache-max-items": "10000",
+		"-blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality":    "1",
+		"-blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio":  "1",
+		"-blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio": "1",
+	})
+
+	require.NoError(t, writeFileToSharedDir(s, "alertmanager_configs", []byte{}))
+
+	path1 := path.Join(s.SharedDir(), "cortex-1")
+	path2 := path.Join(s.SharedDir(), "cortex-2")
+	flags1 = mergeFlags(flags1, map[string]string{"-blocks-storage.filesystem.dir": path1})
+	flags2 = mergeFlags(flags2, map[string]string{"-blocks-storage.filesystem.dir": path2})
+
+	// Both instances use the local build.
+	cortex1 := e2ecortex.NewSingleBinary("cortex-1", flags1, "")
+	cortex2 := e2ecortex.NewSingleBinary("cortex-2", flags2, "")
+	require.NoError(t, s.StartAndWaitReady(cortex1, cortex2))
+
+	require.NoError(t, cortex1.WaitSumMetrics(e2e.Equals(float64(512)), "cortex_ring_tokens_total"))
+	require.NoError(t, cortex2.WaitSumMetrics(e2e.Equals(float64(512)), "cortex_ring_tokens_total"))
+
+	c1, err := e2ecortex.NewClient(cortex1.HTTPEndpoint(), cortex1.HTTPEndpoint(), "", "", "user-1")
+	require.NoError(t, err)
+	c2, err := e2ecortex.NewClient(cortex2.HTTPEndpoint(), cortex2.HTTPEndpoint(), "", "", "user-1")
+	require.NoError(t, err)
+
+	now := time.Now()
+	start := now.Add(-24 * time.Hour)
+	scrapeInterval := 30 * time.Second
+
+	// Build a fixture with multiple labels, including a high-cardinality
+	// "pod"-style label so regex matchers from promqlsmith actually exercise
+	// the deferral path. With lazy-matcher-max-cardinality=1, any label with
+	// >1 unique value is eligible.
+	numSeries := 10
+	numberOfLabelsPerSeries := 5
+	numSamples := 10
+	ss := make([]prompb.TimeSeries, numSeries*numberOfLabelsPerSeries)
+	lbls := make([]labels.Labels, numSeries*numberOfLabelsPerSeries)
+
+	for i := 0; i < numSeries; i++ {
+		for j := 0; j < numberOfLabelsPerSeries; j++ {
+			series := e2e.GenerateSeriesWithSamples(
+				fmt.Sprintf("test_series_%d", i),
+				start,
+				scrapeInterval,
+				i*numSamples,
+				numSamples,
+				prompb.Label{Name: "test_label", Value: fmt.Sprintf("test_label_value_%d", j)},
+				prompb.Label{Name: "pod", Value: fmt.Sprintf("test_pod_%d_%d", i, j)},
+			)
+			ss[i*numberOfLabelsPerSeries+j] = series
+
+			builder := labels.NewBuilder(labels.EmptyLabels())
+			for _, lbl := range series.Labels {
+				builder.Set(lbl.Name, lbl.Value)
+			}
+			lbls[i*numberOfLabelsPerSeries+j] = builder.Labels()
+		}
+	}
+
+	for _, client := range []*e2ecortex.Client{c1, c2} {
+		res, err := client.Push(ss)
+		require.NoError(t, err)
+		require.Equal(t, 200, res.StatusCode)
+	}
+
+	rnd := rand.New(rand.NewSource(now.Unix()))
+	opts := []promqlsmith.Option{
+		promqlsmith.WithEnabledAggrs(enabledAggrs),
+	}
+	ps := promqlsmith.New(rnd, lbls, opts...)
+
+	// Regex patterns that exercise different cost classes in the lazy matcher gate.
+	// Each pattern matches a SUBSET of pods (not all), so both =~ and !~ queries
+	// return non-empty results, verifying correctness with actual data.
+	regexPatterns := []string{
+		".*_0_.*",                    // single contains (simple) — 5/50 pods
+		".*_[0-4]_[0-2]",             // character class (complex) — 15/50 pods
+		"test_pod_[5-9]_.*",          // prefix + class (complex) — 25/50 pods
+		".*pod_3.*",                  // single contains (simple) — 5/50 pods
+		"(test_pod_1|test_pod_2)_.*", // alternation (complex) — 10/50 pods
+	}
+
+	testRun := 300
+	queries := make([]string, 0, testRun*2)
+	matchers := make([]string, 0, testRun)
+	for i := 0; i < testRun; i++ {
+		expr := ps.WalkRangeQuery()
+		if !isValidQuery(expr, true) {
+			continue
+		}
+		queries = append(queries, expr.Pretty(0))
+
+		// Each matcher set includes a __name__= anchor + a regex on pod,
+		// guaranteeing the lazy matcher optimization fires on every cache miss.
+		regex := regexPatterns[i%len(regexPatterns)]
+		matchers = append(matchers, storepb.PromMatchersToString(
+			append(
+				ps.WalkSelectors(),
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", fmt.Sprintf("test_series_%d", i%numSeries)),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", regex),
+			)...))
+
+		// Also generate a direct PromQL query with the regex so the instant/range
+		// query path exercises the lazy matcher too. Include iteration index in
+		// a != matcher to force unique cache keys (cache miss on every query).
+		queries = append(queries, fmt.Sprintf(`test_series_%d{pod=~"%s",test_label!="iter_%d"}`, i%numSeries, regex, i))
+		// Also test negative regex (!~) to exercise that code path.
+		queries = append(queries, fmt.Sprintf(`test_series_%d{pod!~"%s",test_label!="iter_%d_neg"}`, i%numSeries, regex, i))
+	}
+
+	type testCase struct {
+		query        string
+		qt           string
+		res1, res2   model.Value
+		sres1, sres2 []model.LabelSet
+		err1, err2   error
+	}
+
+	cases := make([]*testCase, 0, len(queries)*2+len(matchers))
+
+	// Data spans [start, start + (numSamples-1)*scrapeInterval]. Constrain
+	// fuzzed timestamps to this window so queries actually hit the head block.
+	dataEnd := start.Add(scrapeInterval * time.Duration(numSamples-1))
+	dataWindowMs := dataEnd.Sub(start).Milliseconds()
+
+	for _, query := range queries {
+		fuzzyTime := time.Duration(rand.Int63n(dataWindowMs))
+		queryEnd := start.Add(fuzzyTime * time.Millisecond)
+		res1, err1 := c1.Query(query, queryEnd)
+		res2, err2 := c2.Query(query, queryEnd)
+		cases = append(cases, &testCase{
+			query: query, qt: "instant",
+			res1: res1, res2: res2, err1: err1, err2: err2,
+		})
+		res1, err1 = c1.QueryRange(query, start, queryEnd, scrapeInterval)
+		res2, err2 = c2.QueryRange(query, start, queryEnd, scrapeInterval)
+		cases = append(cases, &testCase{
+			query: query, qt: "range query",
+			res1: res1, res2: res2, err1: err1, err2: err2,
+		})
+	}
+
+	for _, m := range matchers {
+		fuzzyTime := time.Duration(rand.Int63n(dataWindowMs))
+		queryEnd := start.Add(fuzzyTime * time.Millisecond)
+		res1, err := c1.Series([]string{m}, start, queryEnd)
+		require.NoError(t, err)
+		res2, err := c2.Series([]string{m}, start, queryEnd)
+		require.NoError(t, err)
+		cases = append(cases, &testCase{
+			query: m, qt: "get series",
+			sres1: res1, sres2: res2,
+		})
+	}
+
+	failures := 0
+	for i, tc := range cases {
+		if tc.err1 != nil || tc.err2 != nil {
+			if !cmp.Equal(tc.err1, tc.err2) {
+				t.Logf("case %d error mismatch.\n%s: %s\nerr1: %v\nerr2: %v\n", i, tc.qt, tc.query, tc.err1, tc.err2)
+				failures++
+			}
+		} else if shouldUseSampleNumComparer(tc.query) {
+			if !cmp.Equal(tc.res1, tc.res2, sampleNumComparer) {
+				t.Logf("case %d # of samples mismatch.\n%s: %s\nres1: %s\nres2: %s\n", i, tc.qt, tc.query, tc.res1.String(), tc.res2.String())
+				failures++
+			}
+		} else if !cmp.Equal(tc.res1, tc.res2, comparer) {
+			t.Logf("case %d results mismatch.\n%s: %s\nres1: %s\nres2: %s\n", i, tc.qt, tc.query, tc.res1.String(), tc.res2.String())
+			failures++
+		} else if !cmp.Equal(tc.sres1, tc.sres2, labelSetsComparer) {
+			t.Logf("case %d series results mismatch.\n%s: %s\nsres1: %s\nsres2: %s\n", i, tc.qt, tc.query, tc.sres1, tc.sres2)
+			failures++
+		}
+	}
+	if failures > 0 {
+		require.Failf(t, "finished lazy matcher fuzzing tests", "%d test cases failed", failures)
+	}
+
+	// Verify the lazy-matcher optimization was actually triggered on cortex-2.
+	// If the gate is misconfigured or the test fixture doesn't exercise the
+	// path, this guards against silent regressions where the optimization
+	// becomes a no-op.
+
+	// Diagnostic: print related counters before the assertion so failures
+	// can be debugged from the test output.
+	for _, m := range []string{
+		"cortex_ingester_queries",
+		"cortex_ingester_queried_series",
+		"cortex_ingester_queried_chunks",
+		"cortex_ingester_expanded_postings_cache_requests_total",
+		"cortex_ingester_expanded_postings_cache_hits_total",
+		"cortex_ingester_expanded_postings_non_cacheable_queries_total",
+		"cortex_ingester_expanded_postings_lazy_matcher_queries_total",
+	} {
+		v, _ := cortex2.SumMetrics([]string{m})
+		t.Logf("cortex-2 %s = %v", m, v)
+	}
+
+	require.NoError(t, cortex2.WaitSumMetrics(e2e.Greater(0),
+		"cortex_ingester_expanded_postings_lazy_matcher_queries_total"))
+
+	// Sanity check: cortex-1 (eager) should NEVER increment this counter.
+	c1Lazy, err := cortex1.SumMetrics([]string{"cortex_ingester_expanded_postings_lazy_matcher_queries_total"})
+	if err == nil && len(c1Lazy) > 0 {
+		require.Equal(t, float64(0), c1Lazy[0],
+			"cortex-1 has lazy matcher disabled but the metric is non-zero")
+	}
+}
+
 func TestVerticalShardingFuzz(t *testing.T) {
 	s, err := e2e.NewScenario(networkName)
 	require.NoError(t, err)
diff --git a/pkg/ingester/ingester_lazy_posting_bench_test.go b/pkg/ingester/ingester_lazy_posting_bench_test.go
new file mode 100644
index 0000000000..fee2e0f770
--- /dev/null
+++ b/pkg/ingester/ingester_lazy_posting_bench_test.go
@@ -0,0 +1,288 @@
+package ingester
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/thanos/pkg/store/storepb"
+	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/cortexpb"
+	"github.com/cortexproject/cortex/pkg/ring"
+	"github.com/cortexproject/cortex/pkg/util/services"
+	"github.com/cortexproject/cortex/pkg/util/test"
+) // BenchmarkIngester_LazyPosting exercises the lazy-matcher-max-cardinality
+// optimization on the head block cache miss path.
+//
+// The scenarios are based on the benchmark configurations below:
+//   - "small_select_huge_regex_label": __name__ selects 1% of series, regex
+//     targets a 100K-cardinality label. This is the primary case the lazy
+//     matcher optimization is designed for; it should be a clear win.
+//   - "balanced_select_huge_regex_label": __name__ selects 50% of series.
+//     Lazy LabelValueFor calls are now done on a large set; could go either way.
+//   - "small_select_low_card_regex": regex label has only 100 distinct values.
+//     Below the cardinality threshold; optimization should not engage and
+//     overhead must be near zero.
+//   - "small_select_complex_regex": __name__ selects 1%, but regex is complex
+//     (.*lit.*lit.*) — exercises both the cardinality saving and the cost of
+//     running complex regex per-series during lazy filter.
+//
+// Each scenario runs:
+//   - cache_disabled    : baseline; no cache, eager regex
+//   - cache_enabled_eager  : cache enabled, lazy disabled (current default)
+//   - cache_enabled_lazy   : cache enabled + lazy-matcher threshold
+//
+// The benchmark forces a cache miss by clearing the seed before each iteration.
+func BenchmarkIngester_LazyPosting(b *testing.B) {
+	scenarios := []struct {
+		name string
+		// Series cardinality knobs
+		nameValues          int // number of distinct __name__ values
+		seriesPerName       int // series per metric name (drives __name__= selectivity)
+		podCardinality      int // distinct pod values across all series
+		podSharedAcrossName bool
+		// Query
+		matchers      []*labels.Matcher
+		expectedHits  int // sanity check; >0 means some series matched
+		expectMatches bool
+	}{
+		{
+			name:                "small_select_huge_regex_label",
+			nameValues:          50,   // 50 distinct metric names
+			seriesPerName:       2000, // each metric has 2000 series → __name__=cpu selects 2K of 100K
+			podCardinality:      100000,
+			podSharedAcrossName: false,
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"),
+			},
+			expectMatches: true,
+		},
+		{
+			name:                "balanced_select_huge_regex_label",
+			nameValues:          2,     // only 2 metric names
+			seriesPerName:       50000, // each has 50K series → __name__= selects 50K of 100K
+			podCardinality:      100000,
+			podSharedAcrossName: false,
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"),
+			},
+			expectMatches: true,
+		},
+		{
+			name:                "small_select_low_card_regex",
+			nameValues:          50,
+			seriesPerName:       2000,
+			podCardinality:      100, // below default threshold
+			podSharedAcrossName: true,
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"),
+			},
+			expectMatches: true,
+		},
+		{
+			name:                "small_select_complex_regex",
+			nameValues:          50,
+			seriesPerName:       2000,
+			podCardinality:      100000,
+			podSharedAcrossName: false,
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+				// .*foo.*bar.* — multi-substring contains; complex per-call
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod.*1.*"),
+			},
+			expectMatches: true,
+		},
+		{
+			name:                "small_select_capture_regex",
+			nameValues:          50,
+			seriesPerName:       2000,
+			podCardinality:      100000,
+			podSharedAcrossName: false,
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+				// (foo|bar).* — capture group alternation, common in envoy_authority
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", "(pod-1|pod-2).*"),
+			},
+			expectMatches: true,
+		},
+	}
+
+	configs := []struct {
+		name            string
+		cacheEnabled    bool
+		lazyCardinality int
+	}{
+		{name: "no_cache", cacheEnabled: false, lazyCardinality: 0},
+		{name: "cache_eager", cacheEnabled: true, lazyCardinality: 0},
+		{name: "cache_lazy_10k", cacheEnabled: true, lazyCardinality: 10000},
+		{name: "cache_lazy_1k", cacheEnabled: true, lazyCardinality: 1000},
+	}
+
+	for _, sc := range scenarios {
+		b.Run(sc.name, func(b *testing.B) {
+			for _, cfg := range configs {
+				b.Run(cfg.name, func(b *testing.B) {
+					runLazyPostingBenchmark(b, sc.nameValues, sc.seriesPerName, sc.podCardinality,
+						sc.podSharedAcrossName, cfg.cacheEnabled, cfg.lazyCardinality, sc.matchers,
+						sc.expectMatches)
+				})
+			}
+		})
+	}
+}
+
+// BenchmarkIngester_LazyPosting_CacheHit measures cache-hit overhead.
+// The lazy optimization must not slow down the hit path.
+func BenchmarkIngester_LazyPosting_CacheHit(b *testing.B) {
+	matchers := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "metric_0"),
+		labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*pod-1.*"),
+	}
+
+	for _, lazy := range []int{0, 10000} {
+		name := "eager"
+		if lazy > 0 {
+			name = "lazy"
+		}
+		b.Run(name, func(b *testing.B) {
+			runCacheHitBenchmark(b, 50, 2000, 100000, matchers, lazy)
+		})
+	}
+}
+
+func runLazyPostingBenchmark(
+	b *testing.B,
+	nameValues, seriesPerName, podCardinality int,
+	podSharedAcrossName bool,
+	cacheEnabled bool,
+	lazyCardinality int,
+	matchers []*labels.Matcher,
+	expectMatches bool,
+) {
+	b.Helper()
+	const userID = "test"
+	cfg := defaultIngesterTestConfig(b)
+	if cacheEnabled {
+		cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled = true
+		cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.MaxBytes = 100 * 1024 * 1024
+		cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Ttl = time.Hour
+		cfg.BlocksStorageConfig.TSDB.PostingsCache.LazyMatcherMaxCardinality = lazyCardinality
+	}
+
+	i, err := prepareIngesterWithBlocksStorage(b, cfg, prometheus.NewRegistry())
+	require.NoError(b, err)
+	require.NoError(b, services.StartAndAwaitRunning(context.Background(), i))
+	defer func() { _ = services.StopAndAwaitTerminated(context.Background(), i) }()
+
+	test.Poll(b, time.Second, ring.ACTIVE, func() any { return i.lifecycler.GetState() })
+
+	ctx := user.InjectOrgID(context.Background(), userID)
+	pushSeries(b, i, ctx, nameValues, seriesPerName, podCardinality, podSharedAcrossName)
+
+	db, err := i.getTSDB(userID)
+	require.NoError(b, err)
+	require.NotNil(b, db)
+
+	mockStream := &mockQueryStreamServer{ctx: ctx}
+	sm := (&storepb.ShardInfo{TotalShards: 0}).Matcher(nil)
+
+	// Warm up once and validate
+	numSeries, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream)
+	require.NoError(b, err)
+	if expectMatches {
+		require.Greater(b, numSeries, 0, "scenario must produce matches")
+	}
+	mockStream.series = mockStream.series[:0]
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		// Force cache miss by mutating the seed for this metric name.
+		// The seed-by-hash map is keyed by (userID, metricName); we bump it
+		// to invalidate any cached promise for this query.
+		if cacheEnabled && db.postingCache != nil {
+			db.postingCache.ExpireSeries(labels.FromStrings(model.MetricNameLabel, "metric_0"))
+		}
+		_, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream)
+		require.NoError(b, err)
+		mockStream.series = mockStream.series[:0]
+	}
+}
+
+func runCacheHitBenchmark(b *testing.B, nameValues, seriesPerName, podCardinality int, matchers []*labels.Matcher, lazyCardinality int) {
+	const userID = "test"
+	cfg := defaultIngesterTestConfig(b)
+	cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled = true
+	cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.MaxBytes = 100 * 1024 * 1024
+	cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Ttl = time.Hour
+	cfg.BlocksStorageConfig.TSDB.PostingsCache.LazyMatcherMaxCardinality = lazyCardinality
+
+	i, err := prepareIngesterWithBlocksStorage(b, cfg, prometheus.NewRegistry())
+	require.NoError(b, err)
+	require.NoError(b, services.StartAndAwaitRunning(context.Background(), i))
+	defer func() { _ = services.StopAndAwaitTerminated(context.Background(), i) }()
+	test.Poll(b, time.Second, ring.ACTIVE, func() any { return i.lifecycler.GetState() })
+
+	ctx := user.InjectOrgID(context.Background(), userID)
+	pushSeries(b, i, ctx, nameValues, seriesPerName, podCardinality, false)
+
+	db, err := i.getTSDB(userID)
+	require.NoError(b, err)
+
+	mockStream := &mockQueryStreamServer{ctx: ctx}
+	sm := (&storepb.ShardInfo{TotalShards: 0}).Matcher(nil)
+
+	// Prime the cache
+	_, _, _, _, err = i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream)
+	require.NoError(b, err)
+	mockStream.series = mockStream.series[:0]
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		_, _, _, _, err := i.queryStreamChunks(ctx, userID, db, 0, 5000, matchers, sm, mockStream)
+		require.NoError(b, err)
+		mockStream.series = mockStream.series[:0]
+	}
+}
+
+// pushSeries creates `nameValues` distinct __name__ values, each with `seriesPerName`
+// series. Each series gets a unique pod label drawn from `podCardinality` distinct values.
+// When podSharedAcrossName is true, the same pod values are reused across name values
+// (otherwise pods are distinct per name to inflate label cardinality).
+//
+// Pushes one series at a time using writeRequestSingleSeries, which is the
+// proven-working pattern in the existing benchmarks. Slow but reliable.
+func pushSeries(b *testing.B, i *Ingester, ctx context.Context, nameValues, seriesPerName, podCardinality int, podSharedAcrossName bool) {
+	b.Helper()
+	sample := []cortexpb.Sample{{Value: 1, TimestampMs: 1}}
+	for n := range nameValues {
+		metric := fmt.Sprintf("metric_%d", n)
+		for s := range seriesPerName {
+			var podIdx int
+			if podSharedAcrossName {
+				podIdx = s % podCardinality
+			} else {
+				podIdx = (n*seriesPerName + s) % podCardinality
+			}
+			lbls := labels.FromStrings(
+				model.MetricNameLabel, metric,
+				"pod", "pod-"+strconv.Itoa(podIdx),
+				"region", "region-"+strconv.Itoa(s%10),
+				"job", "job-"+strconv.Itoa(s%20),
+			)
+			_, err := i.Push(ctx, writeRequestSingleSeries(lbls, sample))
+			require.NoError(b, err)
+		}
+	}
+}
diff --git a/pkg/storage/tsdb/expanded_postings_cache.go b/pkg/storage/tsdb/expanded_postings_cache.go
index a241607c8a..1ae001a197 100644
--- a/pkg/storage/tsdb/expanded_postings_cache.go
+++ b/pkg/storage/tsdb/expanded_postings_cache.go
@@ -10,6 +10,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/hashicorp/golang-lru/v2/expirable"
 	"github.com/oklog/ulid/v2"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
@@ -42,6 +43,7 @@ type ExpandedPostingsCacheMetrics struct {
 	CacheEvicts         *prometheus.CounterVec
 	CacheMiss           *prometheus.CounterVec
 	NonCacheableQueries *prometheus.CounterVec
+	LazyMatcherQueries  prometheus.Counter
 }
 
 func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetrics {
@@ -66,6 +68,10 @@ func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetri
 			Name: "cortex_ingester_expanded_postings_non_cacheable_queries_total",
 			Help: "Total number of non cacheable queries.",
 		}, []string{"cache"}),
+		LazyMatcherQueries: promauto.With(r).NewCounter(prometheus.CounterOpts{
+			Name: "cortex_ingester_expanded_postings_lazy_matcher_queries_total",
+			Help: "Total number of queries that used lazy matcher evaluation on cache miss.",
+		}),
 	}
 }
 
@@ -73,6 +79,25 @@ type TSDBPostingsCacheConfig struct {
 	Head   PostingsCacheConfig `yaml:"head" doc:"description=If enabled, ingesters will cache expanded postings for the head block. Only queries with with an equal matcher for metric __name__ are cached."`
 	Blocks PostingsCacheConfig `yaml:"blocks" doc:"description=If enabled, ingesters will cache expanded postings for the compacted blocks. The cache is shared between all blocks."`
 
+	// LazyMatcherMaxCardinality configures the maximum label cardinality threshold for
+	// deferring regex matchers on the head block. When a regex matcher targets a label with
+	// more unique values than this threshold, the matcher is applied lazily during series
+	// iteration instead of during postings lookup. This avoids expensive regex scans on
+	// high-cardinality labels when the head postings cache misses. 0 disables this optimization.
+	LazyMatcherMaxCardinality int `yaml:"lazy_matcher_max_cardinality"`
+
+	// LazyMatcherSimpleCostRatio is the cardinality:postings ratio above which a
+	// regex matcher with a simple per-call cost (prefix-only, single contains,
+	// single suffix) is deferred to lazy iteration. Tuned empirically — see
+	// regexCostClass for derivation. Defaults to 6 when unset.
+	LazyMatcherSimpleCostRatio int `yaml:"lazy_matcher_simple_cost_ratio"`
+
+	// LazyMatcherComplexCostRatio is the cardinality:postings ratio above which a
+	// regex matcher with a complex per-call cost (multi-substring contains,
+	// capture groups with literals, character classes) is deferred. Defaults to 2
+	// when unset.
+	LazyMatcherComplexCostRatio int `yaml:"lazy_matcher_complex_cost_ratio"`
+
 	// The configurations below are used only for testing purpose
 	PostingsForMatchers func(ctx context.Context, ix tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error) `yaml:"-"`
 	SeedSize            int                                                                                           `yaml:"-"`
@@ -89,6 +114,9 @@ type PostingsCacheConfig struct {
 func (cfg *TSDBPostingsCacheConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	cfg.Head.RegisterFlagsWithPrefix(prefix, "head", f)
 	cfg.Blocks.RegisterFlagsWithPrefix(prefix, "block", f)
+	f.IntVar(&cfg.LazyMatcherMaxCardinality, prefix+"expanded_postings_cache.head.lazy-matcher-max-cardinality", 0, "Maximum label cardinality for deferring regex matchers on the head block. When a regex matcher targets a label with more unique values than this threshold, it is applied lazily during iteration instead of postings lookup. 0 disables.")
+	f.IntVar(&cfg.LazyMatcherSimpleCostRatio, prefix+"expanded_postings_cache.head.lazy-matcher-simple-cost-ratio", defaultSimpleCostRatio, "Cardinality:postings ratio above which a simple regex (prefix-only, single contains) is deferred to lazy iteration. Lower = more aggressive deferral. Calibrated empirically; defaults to 6.")
+	f.IntVar(&cfg.LazyMatcherComplexCostRatio, prefix+"expanded_postings_cache.head.lazy-matcher-complex-cost-ratio", defaultComplexCostRatio, "Cardinality:postings ratio above which a complex regex (multi-substring, capture groups, character classes) is deferred. Lower = more aggressive deferral. Calibrated empirically; defaults to 2.")
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -139,6 +167,9 @@ type blocksPostingsForMatchersCache struct {
 	postingsForMatchersFunc func(ctx context.Context, ix tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error)
 	timeNow                 func() time.Time
 
+	lazyMatcherCfg        lazyMatcherConfig
+	labelCardinalityCache *expirable.LRU[string, int]
+
 	metrics    *ExpandedPostingsCacheMetrics
 	seedByHash *seedByHash
 }
@@ -162,9 +193,15 @@ func newBlocksPostingsForMatchersCache(userId string, cfg TSDBPostingsCacheConfi
 		blocksCache:             newLruCache[[]storage.SeriesRef](cfg.Blocks, "block", metrics, cfg.timeNow),
 		postingsForMatchersFunc: cfg.PostingsForMatchers,
 		timeNow:                 cfg.timeNow,
-		metrics:                 metrics,
-		seedByHash:              seedByHash,
-		userId:                  userId,
+		lazyMatcherCfg: lazyMatcherConfig{
+			MaxCardinality: cfg.LazyMatcherMaxCardinality,
+			SimpleRatio:    cfg.LazyMatcherSimpleCostRatio,
+			ComplexRatio:   cfg.LazyMatcherComplexCostRatio,
+		},
+		labelCardinalityCache: newLabelCardinalityCache(),
+		metrics:               metrics,
+		seedByHash:            seedByHash,
+		userId:                userId,
 	}
 }
 
@@ -232,6 +269,16 @@ func (c *blocksPostingsForMatchersCache) fetchPostings(blockID ulid.ULID, ix tsd
 			defer cancel()
 		}
 
+		// For head blocks, try to avoid expensive regex scans by splitting matchers:
+		// resolve postings with selective matchers only, then filter by regex lazily.
+		if isHeadBlock(blockID) && c.lazyMatcherCfg.MaxCardinality > 0 {
+			selectMs, lazyMs := splitMatchersForHeadWithConfig(fetchCtx, ix, ms, c.lazyMatcherCfg, c.labelCardinalityCache)
+			if len(lazyMs) > 0 {
+				c.metrics.LazyMatcherQueries.Inc()
+				return c.fetchWithLazyMatchers(fetchCtx, ix, selectMs, lazyMs)
+			}
+		}
+
 		postings, err := c.postingsForMatchersFunc(fetchCtx, ix, ms...)
 
 		if err == nil {
@@ -265,6 +312,60 @@ func (c *blocksPostingsForMatchersCache) result(ce *cacheEntryPromise[[]storage.
 	}
 }
 
+// fetchWithLazyMatchers resolves postings using only the selective matchers, then
+// filters the results by applying the lazy (regex) matchers per-series using
+// LabelValueFor. A per-value cache avoids running the same regex on the same value
+// more than once.
+func (c *blocksPostingsForMatchersCache) fetchWithLazyMatchers(ctx context.Context, ix tsdb.IndexReader, selectMs, lazyMs []*labels.Matcher) ([]storage.SeriesRef, int64, error) {
+	postings, err := c.postingsForMatchersFunc(ctx, ix, selectMs...)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	ids, err := index.ExpandPostings(postings)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// Per-matcher cache: label value -> match result
+	caches := make([]map[string]bool, len(lazyMs))
+	for i := range lazyMs {
+		caches[i] = make(map[string]bool)
+	}
+
+	filtered := ids[:0]
+	for _, id := range ids {
+		matches := true
+		for i, m := range lazyMs {
+			val, err := ix.LabelValueFor(ctx, id, m.Name)
+			if err != nil {
+				// Series doesn't have this label — treat as empty string
+				val = ""
+			}
+
+			if result, ok := caches[i][val]; ok {
+				if !result {
+					matches = false
+					break
+				}
+				continue
+			}
+
+			result := m.Matches(val)
+			caches[i][val] = result
+			if !result {
+				matches = false
+				break
+			}
+		}
+		if matches {
+			filtered = append(filtered, id)
+		}
+	}
+
+	return filtered, int64(len(filtered) * 8), nil
+}
+
 func (c *blocksPostingsForMatchersCache) getSeedForMetricName(metricName string) string {
 	return c.seedByHash.getSeed(c.userId, metricName)
 }
diff --git a/pkg/storage/tsdb/lazy_matchers.go b/pkg/storage/tsdb/lazy_matchers.go
new file mode 100644
index 0000000000..6b50ff98be
--- /dev/null
+++ b/pkg/storage/tsdb/lazy_matchers.go
@@ -0,0 +1,333 @@
+package tsdb
+
+import (
+	"context"
+	"strings"
+	"time"
+
+	"github.com/hashicorp/golang-lru/v2/expirable"
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
+	prom_tsdb "github.com/prometheus/prometheus/tsdb"
+	"github.com/prometheus/prometheus/tsdb/index"
+)
+
+// regexCost classifies how expensive per-call evaluation of a regex matcher
+// is, relative to a single LabelValueFor call. We use this to choose the
+// cardinality:postings ratio gate that decides whether to defer a regex
+// matcher to lazy iteration on the head block.
+//
+// Calibration is empirical, from BenchmarkIngester_LazyPosting:
+//   - LabelValueFor on the head ≈ 1µs per call (lock + map lookup + label parse)
+//   - simple regex eval (prefix-only / single contains via FastRegexMatcher
+//     fast paths) ≈ 200ns per call
+//   - complex regex eval (multi-substring containsInOrder, capture groups,
+//     character classes - falls through to RE2 or multi-Index) ≈ 1µs+ per call
+//
+// Cost model:
+//
+//	eager_cost  ≈ cardinality        * regex_per_call_cost
+//	lazy_cost   ≈ selective_postings * (LabelValueFor_cost + regex_per_call_cost)
+//
+// Lazy is a win when:
+//
+//	selective_postings * (LV + regex) < cardinality * regex
+//	⇒ cardinality / selective_postings > (LV + regex) / regex
+//
+// For simple regex (regex ≈ 200ns, LV ≈ 1µs): ratio > 6 (with margin → 6).
+// For complex regex (regex ≈ 1µs, LV ≈ 1µs):  ratio > 2.
+type regexCost int
+
+const (
+	// regexCostUnknown is a defensive sentinel returned by regexCostClass for
+	// non-regex matchers. Production code never reaches this path (callers
+	// type-check before invoking).
+	regexCostUnknown regexCost = iota
+	// regexCostSimple covers regexes that the FastRegexMatcher fast-paths via
+	// setMatches, prefix anchoring, suffix anchoring, or single-contains. Per-call
+	// cost is dominated by the underlying string op, not RE2 evaluation.
+	regexCostSimple
+	// regexCostComplex covers everything else: multi-substring contains
+	// (.*a.*b.*), alternation of contains, capture groups with siblings,
+	// character classes, lookaheads, etc. Per-call cost includes the full RE2
+	// fallback or multi-step containsInOrder.
+	regexCostComplex
+)
+
+// Calibrated default cost ratios. Used both as struct field defaults and by
+// the flag registration in TSDBPostingsCacheConfig.RegisterFlagsWithPrefix.
+// See regexCost docstring for derivation.
+const (
+	defaultSimpleCostRatio  = 6
+	defaultComplexCostRatio = 2
+)
+
+// regexCostClass returns the per-call cost class of a regex matcher.
+//
+// Notes:
+//   - Matchers with non-empty SetMatches() are short-circuited by the
+//     postingsForMatcher fast-path and never reach our lazy code, but we
+//     classify them as simple for safety.
+//   - Matchers with a non-empty Prefix() are also fast-pathed differently
+//     in postingsForLabelMatching (containsInOrder fast-reject), so they're
+//     classified as simple.
+//   - Negative regex (MatchNotRegexp) is classified the same as MatchRegexp
+//     since the per-call evaluation cost is identical.
+func regexCostClass(m *labels.Matcher) regexCost {
+	if m.Type != labels.MatchRegexp && m.Type != labels.MatchNotRegexp {
+		return regexCostUnknown
+	}
+
+	v := m.GetRegexString()
+
+	// Prefix-only regex (e.g. `foo.*`): the FastRegexMatcher uses HasPrefix
+	// per call. But a regex like `^foo[0-9]+$` ALSO has Prefix()=="foo"
+	// while requiring full RE2 evaluation on positive matches. We can
+	// distinguish by checking the regex string: the prefix is "simple" only
+	// when the remainder of the regex is trivially-matching (.* or empty).
+	if p := m.Prefix(); p != "" {
+		if isPureLiteralPrefix(v, p) {
+			return regexCostSimple
+		}
+		// Prefix exists but the remainder is non-trivial — RE2 still runs
+		// on positive matches.
+		return regexCostComplex
+	}
+
+	// At this point the regex has no setMatches and no prefix. Use the regex
+	// string to detect the remaining "simple" shapes the FastRegexMatcher
+	// optimizes specially.
+	switch {
+	case isSingleContainsRegex(v):
+		// .*foo.* — vanilla extracts m.contains=["foo"], runs containsInOrder
+		// once per value (single strings.Index call). Per-call ≈ regex cost.
+		return regexCostSimple
+	case isPureSuffixRegex(v):
+		// .*foo — extracted as m.suffix; HasSuffix per call.
+		return regexCostSimple
+	}
+
+	return regexCostComplex
+}
+
+// isPureLiteralPrefix returns true when the regex string is just <prefix>
+// optionally followed by `.*` or `.*$` (trivial tail). This is the
+// pattern shape the FastRegexMatcher fully fast-paths via HasPrefix
+// without falling through to RE2.
+func isPureLiteralPrefix(regex, prefix string) bool {
+	// Strip optional ^ anchor.
+	r := strings.TrimPrefix(regex, "^")
+	// The regex must start with the literal prefix.
+	if !strings.HasPrefix(r, prefix) {
+		return false
+	}
+	rest := r[len(prefix):]
+	// Strip optional $ anchor.
+	rest = strings.TrimSuffix(rest, "$")
+	// Trailing must be empty (anchored exact prefix), `.*` (any tail), or
+	// `.+` (any non-empty tail). Anything else (character class, alternation,
+	// nested groups, additional literals) requires the full regex engine.
+	return rest == "" || rest == ".*" || rest == ".+"
+}
+
+// isSingleContainsRegex returns true for `.*<literal>.*` patterns where
+// <literal> contains no regex metacharacters.
+func isSingleContainsRegex(s string) bool {
+	if !strings.HasPrefix(s, ".*") || !strings.HasSuffix(s, ".*") || len(s) <= 4 {
+		return false
+	}
+	inner := s[2 : len(s)-2]
+	return inner != "" && !containsRegexMeta(inner)
+}
+
+// isPureSuffixRegex returns true for `.*<literal>` patterns where <literal>
+// contains no regex metacharacters and the pattern has no trailing .*
+// (otherwise it's single-contains).
+func isPureSuffixRegex(s string) bool {
+	if !strings.HasPrefix(s, ".*") || strings.HasSuffix(s, ".*") {
+		return false
+	}
+	return !containsRegexMeta(s[2:])
+}
+
+// containsRegexMeta reports whether s contains any regex metacharacter.
+func containsRegexMeta(s string) bool {
+	for i := 0; i < len(s); i++ {
+		switch s[i] {
+		case '.', '+', '*', '?', '|', '(', ')', '[', ']', '{', '}', '\\', '^', '$':
+			return true
+		}
+	}
+	return false
+}
+
+// lazyMatcherConfig configures the cost-ratio gates used by
+// splitMatchersForHeadWithConfig. Zero-valued SimpleRatio/ComplexRatio
+// fields are treated as "use the calibrated default" (defaultSimpleCostRatio
+// and defaultComplexCostRatio respectively), NOT as "no margin" — guarding
+// callers who construct the config struct programmatically without going
+// through flag registration.
+type lazyMatcherConfig struct {
+	// MaxCardinality is the floor cardinality below which a label is never
+	// considered for lazy evaluation, regardless of selectivity.
+	MaxCardinality int
+	// SimpleRatio is the cardinality:postings ratio above which simple regex
+	// matchers are deferred. Tuned empirically; see regexCostClass docs.
+	// 0 means "use defaultSimpleCostRatio".
+	SimpleRatio int
+	// ComplexRatio is the cardinality:postings ratio above which complex regex
+	// matchers are deferred. 0 means "use defaultComplexCostRatio".
+	ComplexRatio int
+}
+
+// splitMatchersForHeadWithConfig separates matchers into those used for postings
+// lookup and those applied lazily during iteration, using the configured cost
+// ratios. See lazyMatcherConfig.
+//
+// A matcher is deferred only when ALL of:
+//   - The query already contains a __name__ equality matcher (anchors selectivity)
+//   - The matcher is a regex or negative regex on a non-__name__ label
+//   - The label's cardinality exceeds MaxCardinality
+//   - cardinality > minSelectPostings * ratio, where ratio depends on the
+//     regex's per-call cost class (see regexCostClass)
+func splitMatchersForHeadWithConfig(ctx context.Context, ix prom_tsdb.IndexReader, ms []*labels.Matcher, cfg lazyMatcherConfig, cardinalityCache *expirable.LRU[string, int]) (selectMatchers, lazyMatchers []*labels.Matcher) {
+	if cfg.MaxCardinality <= 0 || len(ms) < 2 {
+		return ms, nil
+	}
+	// Treat zero-valued ratios as "use the calibrated default", not as
+	// "ratio of 1" (which would silently fall back to the original broken
+	// gate). This protects programmatic callers who construct the config
+	// without flag-registration defaults.
+	if cfg.SimpleRatio < 1 {
+		cfg.SimpleRatio = defaultSimpleCostRatio
+	}
+	if cfg.ComplexRatio < 1 {
+		cfg.ComplexRatio = defaultComplexCostRatio
+	}
+
+	hasMetricNameMatcher := false
+	for _, m := range ms {
+		if m.Name == labels.MetricName && m.Type == labels.MatchEqual {
+			hasMetricNameMatcher = true
+			break
+		}
+	}
+	if !hasMetricNameMatcher {
+		return ms, nil
+	}
+
+	// First pass: identify regex matchers that are candidates for deferral and
+	// estimate the number of series the selective (equality) matchers would return.
+	type regexCandidate struct {
+		matcher     *labels.Matcher
+		cardinality int
+		cost        regexCost
+	}
+
+	var candidates []regexCandidate
+	selectMatchers = make([]*labels.Matcher, 0, len(ms))
+	minSelectPostings := 0
+
+	for _, m := range ms {
+		if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp {
+			// Never defer __name__ regex matchers.
+			if m.Name == labels.MetricName {
+				selectMatchers = append(selectMatchers, m)
+				continue
+			}
+
+			// Matchers with SetMatches (e.g. "foo|bar|baz") are resolved via
+			// direct posting lookups in postingsForMatcher — already fast.
+			// Never defer these.
+			if len(m.SetMatches()) > 0 {
+				selectMatchers = append(selectMatchers, m)
+				continue
+			}
+
+			// Check if the label has high cardinality.
+			cardinality := labelCardinality(ctx, ix, m.Name, cardinalityCache)
+			if cardinality <= cfg.MaxCardinality {
+				selectMatchers = append(selectMatchers, m)
+				continue
+			}
+
+			candidates = append(candidates, regexCandidate{
+				matcher:     m,
+				cardinality: cardinality,
+				cost:        regexCostClass(m),
+			})
+			continue
+		}
+
+		selectMatchers = append(selectMatchers, m)
+		if m.Type == labels.MatchEqual {
+			if n := postingsLen(ctx, ix, m.Name, m.Value); n > 0 {
+				if minSelectPostings == 0 || n < minSelectPostings {
+					minSelectPostings = n
+				}
+			}
+		}
+	}
+
+	if len(candidates) == 0 || minSelectPostings == 0 {
+		return ms, nil
+	}
+
+	for _, c := range candidates {
+		ratio := cfg.SimpleRatio
+		if c.cost == regexCostComplex {
+			ratio = cfg.ComplexRatio
+		}
+		// Defer only when lazy iteration is cheaper than the eager scan.
+		// Cost model: cardinality * regex_per_call > selective_postings * (LV + regex).
+		if c.cardinality > minSelectPostings*ratio {
+			lazyMatchers = append(lazyMatchers, c.matcher)
+		} else {
+			selectMatchers = append(selectMatchers, c.matcher)
+		}
+	}
+
+	if len(lazyMatchers) == 0 {
+		return ms, nil
+	}
+
+	return selectMatchers, lazyMatchers
+}
+
+// postingsLen returns the number of series matching a single label pair.
+// For the head block, Postings() for a single value returns a *ListPostings
+// directly, so Len() is O(1) — just a slice length read.
+func postingsLen(ctx context.Context, ix prom_tsdb.IndexReader, name, value string) int {
+	p, err := ix.Postings(ctx, name, value)
+	if err != nil {
+		return 0
+	}
+	if lp, ok := p.(*index.ListPostings); ok {
+		return lp.Len()
+	}
+	return 0
+}
+
+const (
+	labelCardinalityTTL       = 60 * time.Second
+	labelCardinalityCacheSize = 10000 // max label names cached per tenant
+)
+
+// newLabelCardinalityCache creates a bounded, TTL-expiring cache for label cardinality.
+func newLabelCardinalityCache() *expirable.LRU[string, int] {
+	return expirable.NewLRU[string, int](labelCardinalityCacheSize, nil, labelCardinalityTTL)
+}
+
+// labelCardinality returns the number of unique values for a label, using a
+// cache to avoid repeated LabelValues calls on the head block.
+func labelCardinality(ctx context.Context, ix prom_tsdb.IndexReader, name string, cache *expirable.LRU[string, int]) int {
+	if v, ok := cache.Get(name); ok {
+		return v
+	}
+	vals, err := ix.LabelValues(ctx, name, (*storage.LabelHints)(nil))
+	if err != nil {
+		return 0
+	}
+	cache.Add(name, len(vals))
+	return len(vals)
+}
diff --git a/pkg/storage/tsdb/lazy_matchers_test.go b/pkg/storage/tsdb/lazy_matchers_test.go
new file mode 100644
index 0000000000..8ea641b4a2
--- /dev/null
+++ b/pkg/storage/tsdb/lazy_matchers_test.go
@@ -0,0 +1,499 @@
+package tsdb
+
+import (
+	"context"
+	"testing"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
+	prom_tsdb "github.com/prometheus/prometheus/tsdb"
+	"github.com/prometheus/prometheus/tsdb/chunks"
+	"github.com/prometheus/prometheus/tsdb/index"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSplitMatchersForHead(t *testing.T) {
+	ctx := context.Background()
+
+	ir := &mockIndexReader{
+		labelValues: map[string][]string{
+			"__name__":  {"cpu", "memory", "disk"},
+			"pod":       generateValues("pod-", 50000),
+			"namespace": {"prod", "staging", "dev"},
+			"service":   {"api", "worker", "gateway", "frontend", "backend"},
+			"job":       {"api", "worker", "gateway"},
+		},
+		postingsCounts: map[string]int{
+			"__name__\xffcpu":      1000,
+			"__name__\xffmemory":   800,
+			"service\xffapi":       200,
+			"service\xffworker":    300,
+			"namespace\xffprod":    500,
+			"namespace\xffstaging": 300,
+			"namespace\xffdev":     200,
+		},
+	}
+
+	tests := []struct {
+		name           string
+		matchers       []*labels.Matcher
+		maxCardinality int
+		wantSelect     int
+		wantLazy       int
+		wantLazyLabels []string
+	}{
+		{
+			name: "regex on high-cardinality label with selective equality matcher - deferred",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchEqual, "service", "api"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"),
+			},
+			maxCardinality: 10000,
+			wantSelect:     2, // __name__ + service
+			wantLazy:       1,
+			wantLazyLabels: []string{"pod"},
+		},
+		{
+			name: "regex on high-cardinality label with only __name__ equality - deferred (name is selective)",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"),
+			},
+			maxCardinality: 10000,
+			wantSelect:     1,
+			wantLazy:       1,
+			wantLazyLabels: []string{"pod"},
+		},
+		{
+			name: "regex on low-cardinality label - NOT deferred regardless",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchEqual, "service", "api"),
+				labels.MustNewMatcher(labels.MatchRegexp, "namespace", "prod|staging"),
+			},
+			maxCardinality: 10000,
+			wantSelect:     3, // namespace only has 3 values, below threshold
+			wantLazy:       0,
+		},
+		{
+			name: "no __name__ matcher - nothing deferred",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"),
+				labels.MustNewMatcher(labels.MatchEqual, "namespace", "prod"),
+			},
+			maxCardinality: 10000,
+			wantSelect:     2,
+			wantLazy:       0,
+		},
+		{
+			name: "disabled when maxCardinality is 0",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchEqual, "service", "api"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", ".*alan.*"),
+			},
+			maxCardinality: 0,
+			wantSelect:     3,
+			wantLazy:       0,
+		},
+		{
+			name: "negative regex on high-cardinality with selective matcher - deferred",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchEqual, "namespace", "prod"),
+				labels.MustNewMatcher(labels.MatchNotRegexp, "pod", ".*test.*"),
+			},
+			maxCardinality: 10000,
+			wantSelect:     2,
+			wantLazy:       1,
+			wantLazyLabels: []string{"pod"},
+		},
+		{
+			name: "__name__ regex is never deferred",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+				labels.MustNewMatcher(labels.MatchEqual, "service", "api"),
+				labels.MustNewMatcher(labels.MatchRegexp, "__name__", "cpu|memory"),
+			},
+			maxCardinality: 1,
+			wantSelect:     3,
+			wantLazy:       0,
+		},
+		{
+			name: "single matcher - nothing deferred",
+			matchers: []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+			},
+			maxCardinality: 1,
+			wantSelect:     1,
+			wantLazy:       0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			selectMs, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, tt.matchers, lazyMatcherConfig{
+				MaxCardinality: tt.maxCardinality,
+				// Use ratio=1 so this test continues to assert on the old
+				// (cardinality > minSelectPostings) gate semantics. Cost-ratio
+				// behavior is covered separately in TestSplitMatchersForHead_CostRatio.
+				SimpleRatio:  1,
+				ComplexRatio: 1,
+			}, newLabelCardinalityCache())
+			assert.Len(t, selectMs, tt.wantSelect, "select matchers count")
+			assert.Len(t, lazyMs, tt.wantLazy, "lazy matchers count")
+
+			for i, name := range tt.wantLazyLabels {
+				assert.Equal(t, name, lazyMs[i].Name)
+			}
+		})
+	}
+}
+
+func TestFetchWithLazyMatchers(t *testing.T) {
+	ctx := context.Background()
+
+	// Build an in-memory head with known series
+	ir := &mockIndexReaderWithSeries{
+		mockIndexReader: mockIndexReader{
+			labelValues: map[string][]string{
+				"__name__": {"cpu"},
+				"pod":      {"web-1", "web-2", "worker-1", "worker-2", "api-1"},
+				"service":  {"frontend", "backend"},
+			},
+		},
+		series: map[storage.SeriesRef]labels.Labels{
+			1: labels.FromStrings("__name__", "cpu", "pod", "web-1", "service", "frontend"),
+			2: labels.FromStrings("__name__", "cpu", "pod", "web-2", "service", "frontend"),
+			3: labels.FromStrings("__name__", "cpu", "pod", "worker-1", "service", "backend"),
+			4: labels.FromStrings("__name__", "cpu", "pod", "worker-2", "service", "backend"),
+			5: labels.FromStrings("__name__", "cpu", "pod", "api-1", "service", "backend"),
+		},
+	}
+
+	cache := &blocksPostingsForMatchersCache{
+		postingsForMatchersFunc: func(_ context.Context, ix prom_tsdb.IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
+			// Simulate: selectMs = [__name__="cpu", service="frontend"] -> returns refs 1, 2
+			return index.NewListPostings([]storage.SeriesRef{1, 2, 3, 4, 5}[:2]), nil
+		},
+	}
+
+	selectMs := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+		labels.MustNewMatcher(labels.MatchEqual, "service", "frontend"),
+	}
+	lazyMs := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchRegexp, "pod", "web.*"),
+	}
+
+	refs, size, err := cache.fetchWithLazyMatchers(ctx, ir, selectMs, lazyMs)
+	assert.NoError(t, err)
+	assert.Equal(t, int64(len(refs)*8), size)
+	// Both series 1 and 2 have pod=web-*, so both should match
+	assert.Equal(t, []storage.SeriesRef{1, 2}, refs)
+}
+
+func TestFetchWithLazyMatchers_FiltersCorrectly(t *testing.T) {
+	ctx := context.Background()
+
+	ir := &mockIndexReaderWithSeries{
+		mockIndexReader: mockIndexReader{
+			labelValues: map[string][]string{
+				"pod": {"web-1", "worker-1", "web-2"},
+			},
+		},
+		series: map[storage.SeriesRef]labels.Labels{
+			1: labels.FromStrings("pod", "web-1"),
+			2: labels.FromStrings("pod", "worker-1"),
+			3: labels.FromStrings("pod", "web-2"),
+		},
+	}
+
+	cache := &blocksPostingsForMatchersCache{
+		postingsForMatchersFunc: func(_ context.Context, _ prom_tsdb.IndexReader, _ ...*labels.Matcher) (index.Postings, error) {
+			return index.NewListPostings([]storage.SeriesRef{1, 2, 3}), nil
+		},
+	}
+
+	selectMs := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchEqual, "__name__", "cpu"),
+	}
+	lazyMs := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchRegexp, "pod", "web.*"),
+	}
+
+	refs, _, err := cache.fetchWithLazyMatchers(ctx, ir, selectMs, lazyMs)
+	assert.NoError(t, err)
+	assert.Equal(t, []storage.SeriesRef{1, 3}, refs)
+}
+
+// --- Mocks ---
+
+type mockIndexReader struct {
+	prom_tsdb.IndexReader
+	labelValues    map[string][]string
+	postingsCounts map[string]int // "name\xffvalue" -> count
+}
+
+func (m *mockIndexReader) LabelValues(_ context.Context, name string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) {
+	return m.labelValues[name], nil
+}
+
+func (m *mockIndexReader) Close() error              { return nil }
+func (m *mockIndexReader) Symbols() index.StringIter { return nil }
+func (m *mockIndexReader) LabelNames(_ context.Context, _ ...*labels.Matcher) ([]string, error) {
+	return nil, nil
+}
+func (m *mockIndexReader) SortedLabelValues(_ context.Context, _ string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) {
+	return nil, nil
+}
+func (m *mockIndexReader) Postings(_ context.Context, name string, values ...string) (index.Postings, error) {
+	if m.postingsCounts != nil && len(values) == 1 {
+		key := name + "\xff" + values[0]
+		if n, ok := m.postingsCounts[key]; ok {
+			refs := make([]storage.SeriesRef, n)
+			for i := range refs {
+				refs[i] = storage.SeriesRef(i + 1)
+			}
+			return index.NewListPostings(refs), nil
+		}
+	}
+	return index.EmptyPostings(), nil
+}
+func (m *mockIndexReader) PostingsForLabelMatching(_ context.Context, _ string, _ func(string) bool) index.Postings {
+	return index.EmptyPostings()
+}
+func (m *mockIndexReader) PostingsForAllLabelValues(_ context.Context, _ string) index.Postings {
+	return index.EmptyPostings()
+}
+func (m *mockIndexReader) SortedPostings(p index.Postings) index.Postings               { return p }
+func (m *mockIndexReader) ShardedPostings(p index.Postings, _, _ uint64) index.Postings { return p }
+func (m *mockIndexReader) Series(_ storage.SeriesRef, _ *labels.ScratchBuilder, _ *[]chunks.Meta) error {
+	return nil
+}
+func (m *mockIndexReader) LabelValueFor(_ context.Context, _ storage.SeriesRef, _ string) (string, error) {
+	return "", storage.ErrNotFound
+}
+func (m *mockIndexReader) LabelNamesFor(_ context.Context, _ index.Postings) ([]string, error) {
+	return nil, nil
+}
+
+// mockIndexReaderWithSeries extends mockIndexReader with series label data
+type mockIndexReaderWithSeries struct {
+	mockIndexReader
+	series map[storage.SeriesRef]labels.Labels
+}
+
+func (m *mockIndexReaderWithSeries) LabelValueFor(_ context.Context, id storage.SeriesRef, label string) (string, error) {
+	lbls, ok := m.series[id]
+	if !ok {
+		return "", storage.ErrNotFound
+	}
+	v := lbls.Get(label)
+	if v == "" {
+		return "", storage.ErrNotFound
+	}
+	return v, nil
+}
+
+func generateValues(prefix string, count int) []string {
+	vals := make([]string, count)
+	for i := range vals {
+		vals[i] = prefix + string(rune('0'+i%10)) + string(rune('0'+i/10%10))
+	}
+	return vals
+}
+
+// TestRegexCostClass verifies the complexity classifier we use to choose
+// the cardinality:postings ratio gate. The classifier MUST agree with the
+// fast-path semantics in postingsForMatcher: regexes that prometheus would
+// short-circuit via setMatches or prefix-only matching are "simple" (cheap
+// per-call); everything else (multi-substring contains, captures, character
+// classes) is "complex" (expensive per-call, lazy iteration wins at lower
+// cardinality:postings ratio).
+func TestRegexCostClass(t *testing.T) {
+	cases := []struct {
+		name      string
+		matcher   *labels.Matcher
+		wantClass regexCost
+	}{
+		{
+			"prefix only - cheap containsInOrder fast-reject",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", "foo.*"),
+			regexCostSimple,
+		},
+		{
+			"single contains - moderate",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", ".*foo.*"),
+			regexCostSimple,
+		},
+		{
+			"multi-substring contains - complex",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", ".*foo.*bar.*"),
+			regexCostComplex,
+		},
+		{
+			"capture group + literal - complex",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", "(.+)-(.+)-(.+)"),
+			regexCostComplex,
+		},
+		{
+			"alternation of contains - complex",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", ".*a.*|.*b.*|.*c.*"),
+			regexCostComplex,
+		},
+		{
+			"plain anchored regex with character class - complex",
+			labels.MustNewMatcher(labels.MatchRegexp, "x", "^foo[0-9]+$"),
+			regexCostComplex,
+		},
+		{
+			"NotRegexp single contains - moderate",
+			labels.MustNewMatcher(labels.MatchNotRegexp, "x", ".*foo.*"),
+			regexCostSimple,
+		},
+		{
+			"NotRegexp multi-substring - complex",
+			labels.MustNewMatcher(labels.MatchNotRegexp, "x", ".*a.*b.*"),
+			regexCostComplex,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := regexCostClass(tc.matcher)
+			assert.Equal(t, tc.wantClass, got, "%s: got %v want %v", tc.matcher.String(), got, tc.wantClass)
+		})
+	}
+}
+
+// TestSplitMatchersForHead_ZeroRatioUsesDefaults verifies that programmatic
+// callers who construct lazyMatcherConfig without going through flag
+// registration get the calibrated defaults (6 and 2), NOT a clamped 1 (which
+// would silently re-introduce the original broken gate). See the regexCost
+// docstring for the cost model.
+func TestSplitMatchersForHead_ZeroRatioUsesDefaults(t *testing.T) {
+	ctx := context.Background()
+	ir := &mockIndexReader{
+		labelValues: map[string][]string{
+			"__name__": {"metric_a"},
+			"pod":      generateValues("pod-", 30000),
+		},
+		postingsCounts: map[string]int{
+			"__name__\xffmetric_a": 10000,
+		},
+	}
+	matchers := []*labels.Matcher{
+		labels.MustNewMatcher(labels.MatchEqual, "__name__", "metric_a"),
+		// simple regex: prefix-only.
+		labels.MustNewMatcher(labels.MatchRegexp, "pod", "foo.*"),
+	}
+	// Cardinality:postings ratio = 30000/10000 = 3.
+	// With ratio=1 (clamped), 3 > 1 → would defer.
+	// With default ratio=6, 3 > 6 is false → must NOT defer.
+	cfg := lazyMatcherConfig{
+		MaxCardinality: 1000,
+		// SimpleRatio and ComplexRatio left zero — must use defaults.
+	}
+	_, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, matchers, cfg, newLabelCardinalityCache())
+	assert.Len(t, lazyMs, 0, "zero SimpleRatio must default to %d, not be clamped to 1", defaultSimpleCostRatio)
+}
+
+// The original gate was `cardinality > minSelectPostings`, which incorrectly
+// deferred regex evaluation when LabelValueFor (per-series) cost would exceed
+// PostingsForLabelMatching (per-value) cost. The fixed gate is
+// `cardinality > minSelectPostings * ratio` where ratio depends on regex cost
+// class:
+//   - simple regex (prefix-only / single contains): ratio=6 (LabelValueFor is
+//     ~5x more expensive than a fast-path regex evaluation, +1 margin)
+//   - complex regex (multi-substring, capture, char class): ratio=2 (per-call
+//     regex cost is high enough that lazy wins at lower ratio)
+func TestSplitMatchersForHead_CostRatio(t *testing.T) {
+	const (
+		simpleRatio  = 6
+		complexRatio = 2
+	)
+	// Build an index where __name__=metric_a has 10000 series and pod has
+	// varying cardinalities to test the gate.
+	build := func(podCard int) *mockIndexReader {
+		return &mockIndexReader{
+			labelValues: map[string][]string{
+				"__name__": {"metric_a"},
+				"pod":      generateValues("pod-", podCard),
+			},
+			postingsCounts: map[string]int{
+				"__name__\xffmetric_a": 10000,
+			},
+		}
+	}
+
+	cases := []struct {
+		name          string
+		podCard       int
+		regex         string
+		simpleRatio   int
+		complexRatio  int
+		wantLazyCount int
+	}{
+		{
+			// 20K cardinality, 10K postings → ratio = 2 → for SIMPLE regex this
+			// is below the 6x threshold; should NOT defer (this is the
+			// `balanced_select` failure mode the original code triggered).
+			name:          "simple regex 2x ratio - NOT deferred (cost gate)",
+			podCard:       20000,
+			regex:         "foo.*",
+			simpleRatio:   simpleRatio,
+			complexRatio:  complexRatio,
+			wantLazyCount: 0,
+		},
+		{
+			// 100K cardinality, 10K postings → ratio = 10 → above 6x; defer.
+			name:          "simple regex 10x ratio - deferred",
+			podCard:       100000,
+			regex:         "foo.*",
+			simpleRatio:   simpleRatio,
+			complexRatio:  complexRatio,
+			wantLazyCount: 1,
+		},
+		{
+			// 25K cardinality, 10K postings → ratio = 2.5 → above complex
+			// threshold of 2; defer.
+			name:          "complex regex 2.5x ratio - deferred",
+			podCard:       25000,
+			regex:         ".*foo.*bar.*",
+			simpleRatio:   simpleRatio,
+			complexRatio:  complexRatio,
+			wantLazyCount: 1,
+		},
+		{
+			// 15K cardinality, 10K postings → ratio = 1.5 → below complex
+			// threshold of 2; do NOT defer.
+			name:          "complex regex 1.5x ratio - NOT deferred",
+			podCard:       15000,
+			regex:         ".*foo.*bar.*",
+			simpleRatio:   simpleRatio,
+			complexRatio:  complexRatio,
+			wantLazyCount: 0,
+		},
+	}
+
+	ctx := context.Background()
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			ir := build(tc.podCard)
+			matchers := []*labels.Matcher{
+				labels.MustNewMatcher(labels.MatchEqual, "__name__", "metric_a"),
+				labels.MustNewMatcher(labels.MatchRegexp, "pod", tc.regex),
+			}
+			cfg := lazyMatcherConfig{
+				MaxCardinality: 1000, // engage threshold (< all podCard above)
+				SimpleRatio:    tc.simpleRatio,
+				ComplexRatio:   tc.complexRatio,
+			}
+			_, lazyMs := splitMatchersForHeadWithConfig(ctx, ir, matchers, cfg, newLabelCardinalityCache())
+			assert.Len(t, lazyMs, tc.wantLazyCount,
+				"podCard=%d regex=%q: lazyCount mismatch", tc.podCard, tc.regex)
+		})
+	}
+}
diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json
index e93e7c36a4..fb1bee6593 100644
--- a/schemas/cortex-config-schema.json
+++ b/schemas/cortex-config-schema.json
@@ -3045,6 +3045,24 @@
                     }
                   },
                   "type": "object"
+                },
+                "lazy_matcher_complex_cost_ratio": {
+                  "default": 2,
+                  "description": "Cardinality:postings ratio above which a complex regex (multi-substring, capture groups, character classes) is deferred. Lower = more aggressive deferral. Calibrated empirically; defaults to 2.",
+                  "type": "number",
+                  "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-complex-cost-ratio"
+                },
+                "lazy_matcher_max_cardinality": {
+                  "default": 0,
+                  "description": "Maximum label cardinality for deferring regex matchers on the head block. When a regex matcher targets a label with more unique values than this threshold, it is applied lazily during iteration instead of postings lookup. 0 disables.",
+                  "type": "number",
+                  "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality"
+                },
+                "lazy_matcher_simple_cost_ratio": {
+                  "default": 6,
+                  "description": "Cardinality:postings ratio above which a simple regex (prefix-only, single contains) is deferred to lazy iteration. Lower = more aggressive deferral. Calibrated empirically; defaults to 6.",
+                  "type": "number",
+                  "x-cli-flag": "blocks-storage.expanded_postings_cache.head.lazy-matcher-simple-cost-ratio"
                 }
               },
               "type": "object"