Skip to content

Commit 4d482a2

Browse files
authored
Refactor selectivity estimations. Use exponential backoff to take possible index-dependencies into account. (#8940)
1 parent 0c91d1a commit 4d482a2

4 files changed

Lines changed: 156 additions & 108 deletions

File tree

src/jrd/optimizer/InnerJoin.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ void InnerJoin::calculateStreamInfo()
108108
innerStream->baseIndexes = candidate->indexes;
109109
innerStream->baseUnique = candidate->unique;
110110
innerStream->baseNavigated = candidate->navigated;
111-
innerStream->baseConjuncts = candidate->conjuncts;
112111

113112
csb->csb_rpt[innerStream->number].deactivate();
114113
}

src/jrd/optimizer/Optimizer.cpp

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,39 @@ double Optimizer::getDependentSelectivity()
12181218
}
12191219

12201220

1221+
//
1222+
// Estimate overall selectivity for a list of conjuncts.
1223+
// Booleans are usually inter-dependent in practice and simple multiplication results to a very low selectivity value,
1224+
// thus causing the stream cardinality being under-estimated. To avoid this, apply exponential backoff adjustment.
1225+
// See also explanation in the middle of Retrieval::makeInversion().
1226+
//
1227+
1228+
double Optimizer::estimateSelectivity(const BooleanList& filters, double cardinality, unsigned priorConjuncts)
1229+
{
1230+
// Get selectivities and order them
1231+
SortedArray<double, InlineStorage<double, OPT_STATIC_ITEMS> > selectivities;
1232+
1233+
for (const auto filter : filters)
1234+
selectivities.add(getSelectivity(filter));
1235+
1236+
auto selectivity = MAXIMUM_SELECTIVITY;
1237+
1238+
if (selectivities.hasData() && !priorConjuncts && cardinality)
1239+
{
1240+
// If the table is small enough, the hardcoded selectivity factors are causing
1241+
// too small resulting selectivity. Adjust the initial value to protect from this case.
1242+
const auto minSelectivity = MAXIMUM_SELECTIVITY / cardinality;
1243+
selectivity *= minSelectivity / selectivities.front();
1244+
}
1245+
1246+
// Apply exponential backoff
1247+
for (auto factor : selectivities)
1248+
selectivity *= applyBackoff(factor, priorConjuncts++);
1249+
1250+
return selectivity;
1251+
}
1252+
1253+
12211254
//
12221255
// Prepare relation and its indices for optimization
12231256
//
@@ -2970,6 +3003,7 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
29703003
BoolExprNode* condition = nullptr;
29713004
Array<DbKeyRangeNode*> dbkeyRanges;
29723005
double scanSelectivity = MAXIMUM_SELECTIVITY;
3006+
double filterSelectivity = MAXIMUM_SELECTIVITY;
29733007

29743008
if (relation()->getExtFile())
29753009
{
@@ -3020,14 +3054,14 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30203054
// Persistent table
30213055
Retrieval retrieval(tdbb, this, stream, outerFlag, innerFlag,
30223056
(sortClause ? *sortClause : nullptr), false);
3023-
const auto candidate = retrieval.getInversion();
30243057

3025-
if (candidate)
3058+
if (const auto candidate = retrieval.getInversion())
30263059
{
30273060
inversion = candidate->inversion;
30283061
condition = candidate->condition;
30293062
dbkeyRanges.assign(candidate->dbkeyRanges);
30303063
scanSelectivity = candidate->matchSelectivity;
3064+
filterSelectivity = candidate->filterSelectivity;
30313065

30323066
// Just for safety sake, this condition must be already checked
30333067
// inside OptimizerRetrieval::matchOnIndexes()
@@ -3066,8 +3100,8 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30663100
// booleans. When one is found, roll it into a final boolean and mark
30673101
// it used. If a computable boolean didn't match against an index then
30683102
// mark the stream to denote unmatched booleans.
3103+
BooleanList filters;
30693104
BoolExprNode* boolean = nullptr;
3070-
double filterSelectivity = MAXIMUM_SELECTIVITY;
30713105

30723106
for (auto iter = getConjuncts(outerFlag, innerFlag); iter.hasData(); ++iter)
30733107
{
@@ -3093,12 +3127,16 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30933127
}
30943128

30953129
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3096-
filterSelectivity *= getSelectivity(*iter);
3130+
filters.add(*iter);
30973131
}
30983132
}
30993133
}
31003134

3101-
if (!rsb)
3135+
if (rsb)
3136+
{
3137+
filterSelectivity = Optimizer::estimateSelectivity(filters, rsb->getCardinality());
3138+
}
3139+
else
31023140
{
31033141
if (inversion && condition)
31043142
{
@@ -3134,9 +3172,13 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
31343172

31353173
RecordSource* Optimizer::applyBoolean(RecordSource* rsb, ConjunctIterator& iter)
31363174
{
3137-
double selectivity = MAXIMUM_SELECTIVITY;
3138-
if (const auto boolean = composeBoolean(iter, &selectivity))
3175+
BooleanList filters;
3176+
3177+
if (const auto boolean = composeBoolean(iter, filters))
3178+
{
3179+
const auto selectivity = estimateSelectivity(filters, rsb->getCardinality());
31393180
rsb = FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity);
3181+
}
31403182

31413183
return rsb;
31423184
}
@@ -3167,8 +3209,8 @@ RecordSource* Optimizer::applyLocalBoolean(RecordSource* rsb,
31673209

31683210
RecordSource* Optimizer::applyResidualBoolean(RecordSource* rsb)
31693211
{
3212+
BooleanList filters;
31703213
BoolExprNode* boolean = nullptr;
3171-
double selectivity = MAXIMUM_SELECTIVITY;
31723214

31733215
for (auto iter = getBaseConjuncts(); iter.hasData(); ++iter)
31743216
{
@@ -3178,15 +3220,17 @@ RecordSource* Optimizer::applyResidualBoolean(RecordSource* rsb)
31783220
iter |= CONJUNCT_USED;
31793221

31803222
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3181-
selectivity *= getSelectivity(*iter);
3223+
filters.add(*iter);
31823224
}
31833225
}
31843226

3227+
const auto selectivity = estimateSelectivity(filters, rsb->getCardinality());
3228+
31853229
return boolean ? FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity) : rsb;
31863230
}
31873231

31883232

3189-
BoolExprNode* Optimizer::composeBoolean(ConjunctIterator& iter, double* selectivity)
3233+
BoolExprNode* Optimizer::composeBoolean(ConjunctIterator& iter, BooleanList& filters)
31903234
{
31913235
BoolExprNode* boolean = nullptr;
31923236

@@ -3199,8 +3243,8 @@ BoolExprNode* Optimizer::composeBoolean(ConjunctIterator& iter, double* selectiv
31993243
compose(getPool(), &boolean, iter);
32003244
iter |= CONJUNCT_USED;
32013245

3202-
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)) && selectivity)
3203-
*selectivity *= getSelectivity(*iter);
3246+
if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3247+
filters.add(*iter);
32043248
}
32053249
}
32063250

src/jrd/optimizer/Optimizer.h

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include "../jrd/Statement.h"
4444
#include "../jrd/recsrc/RecordSource.h"
4545

46+
#include <cmath>
47+
4648
namespace Jrd {
4749

4850
// AB: 2005-11-05
@@ -84,6 +86,9 @@ class SortNode;
8486
class River;
8587
class SortedStream;
8688

89+
// List of booleans
90+
typedef Firebird::HalfStaticArray<BoolExprNode*, OPT_STATIC_ITEMS> BooleanList;
91+
8792

8893
//
8994
// River
@@ -275,7 +280,7 @@ class Optimizer final : public Firebird::PermanentStorage
275280
{
276281
// Conjunctions and their options
277282
BoolExprNode* node;
278-
unsigned flags;
283+
unsigned flags = 0;
279284
};
280285

281286
static constexpr unsigned CONJUNCT_USED = 1; // conjunct is used
@@ -445,23 +450,13 @@ class Optimizer final : public Firebird::PermanentStorage
445450
}
446451
}
447452

448-
// dimitr:
449-
//
450-
// Adjust to values similar to those used when the index selectivity is missing.
451-
// The final value will be in the range [0.1 .. 0.5] that also matches the v3/v4 logic.
452-
// This estimation is quite pessimistic but it seems to work better in practice,
453-
// especially when multiple unmatchable booleans are used.
454-
455-
constexpr auto adjustment = DEFAULT_SELECTIVITY / REDUCE_SELECTIVITY_FACTOR_EQUALITY;
456-
const auto selectivity = factor * adjustment;
453+
if (!factor)
454+
factor = DEFAULT_SELECTIVITY;
457455

458-
return MIN(selectivity, MAXIMUM_SELECTIVITY / 2);
456+
return MIN(factor, MAXIMUM_SELECTIVITY);
459457
}
460458

461-
static void adjustSelectivity(double& selectivity, double factor) noexcept
462-
{
463-
selectivity = MIN(selectivity * factor, MAXIMUM_SELECTIVITY);
464-
}
459+
static double estimateSelectivity(const BooleanList& filters, double cardinality = 0, unsigned priorConjuncts = 0);
465460

466461
double getDependentSelectivity();
467462

@@ -485,6 +480,14 @@ class Optimizer final : public Firebird::PermanentStorage
485480
return false;
486481
}
487482

483+
static double applyBackoff(double selectivity, unsigned priorConjuncts)
484+
{
485+
for (unsigned i = 0; i < priorConjuncts; i++)
486+
selectivity = std::sqrt(selectivity);
487+
488+
return selectivity;
489+
}
490+
488491
static RecordSource* compile(thread_db* tdbb, CompilerScratch* csb, RseNode* rse)
489492
{
490493
bool firstRows = false;
@@ -560,13 +563,13 @@ class Optimizer final : public Firebird::PermanentStorage
560563
ConjunctIterator& iter);
561564
RecordSource* applyResidualBoolean(RecordSource* rsb);
562565

563-
BoolExprNode* composeBoolean(ConjunctIterator& iter,
564-
double* selectivity = nullptr);
566+
BoolExprNode* composeBoolean(ConjunctIterator& iter, BooleanList& filters);
565567

566-
BoolExprNode* composeBoolean(double* selectivity = nullptr)
568+
BoolExprNode* composeBoolean()
567569
{
570+
BooleanList filters;
568571
auto iter = getBaseConjuncts();
569-
return composeBoolean(iter, selectivity);
572+
return composeBoolean(iter, filters);
570573
}
571574

572575
bool checkEquiJoin(BoolExprNode* boolean);
@@ -653,8 +656,6 @@ enum segmentScanType {
653656
segmentScanList
654657
};
655658

656-
typedef Firebird::HalfStaticArray<BoolExprNode*, OPT_STATIC_ITEMS> BooleanList;
657-
658659
struct IndexScratchSegment
659660
{
660661
explicit IndexScratchSegment(MemoryPool& p)
@@ -714,11 +715,12 @@ typedef Firebird::ObjectsArray<IndexScratch> IndexScratchList;
714715
struct InversionCandidate
715716
{
716717
explicit InversionCandidate(MemoryPool& p)
717-
: conjuncts(p), matches(p), dbkeyRanges(p), dependentFromStreams(p)
718+
: matches(p), filters(p), dbkeyRanges(p), dependentFromStreams(p)
718719
{}
719720

720721
double selectivity = MAXIMUM_SELECTIVITY;
721722
double matchSelectivity = MAXIMUM_SELECTIVITY;
723+
double filterSelectivity = MAXIMUM_SELECTIVITY;
722724
double cost = 0;
723725
unsigned nonFullMatchedSegments = MAX_INDEX_SEGMENTS + 1;
724726
unsigned matchedSegments = 0;
@@ -732,10 +734,19 @@ struct InversionCandidate
732734
bool unique = false;
733735
bool navigated = false;
734736

735-
BooleanList conjuncts; // booleans referring our stream
736737
BooleanList matches; // booleans matched to any index
738+
BooleanList filters; // unmatched booleans referring our stream
737739
Firebird::Array<DbKeyRangeNode*> dbkeyRanges;
738740
SortedStreamList dependentFromStreams;
741+
742+
void applyFilters(double cardinality)
743+
{
744+
fb_assert(selectivity == matchSelectivity);
745+
fb_assert(filterSelectivity == MAXIMUM_SELECTIVITY);
746+
const auto matchCount = (unsigned) matches.getCount();
747+
filterSelectivity = Optimizer::estimateSelectivity(filters, cardinality, matchCount);
748+
selectivity *= filterSelectivity;
749+
}
739750
};
740751

741752
typedef Firebird::HalfStaticArray<InversionCandidate*, OPT_STATIC_ITEMS> InversionCandidateList;
@@ -865,7 +876,7 @@ class InnerJoin final : private Firebird::PermanentStorage
865876
{
866877
public:
867878
StreamInfo(MemoryPool& p, StreamType num)
868-
: number(num), baseConjuncts(p), indexedRelationships(p)
879+
: number(num), indexedRelationships(p)
869880
{}
870881

871882
bool isIndependent() const noexcept
@@ -912,7 +923,6 @@ class InnerJoin final : private Firebird::PermanentStorage
912923
bool used = false;
913924
unsigned previousExpectedStreams = 0;
914925

915-
BooleanList baseConjuncts;
916926
IndexedRelationships indexedRelationships;
917927
};
918928

0 commit comments

Comments
 (0)