@@ -1218,6 +1218,39 @@ double Optimizer::getDependentSelectivity()
12181218}
12191219
12201220
1221+ //
1222+ // Estimate overall selectivity for a list of conjuncts.
1223+ // Booleans are usually inter-dependent in practice and simple multiplication results to a very low selectivity value,
1224+ // thus causing the stream cardinality being under-estimated. To avoid this, apply exponential backoff adjustment.
1225+ // See also explanation in the middle of Retrieval::makeInversion().
1226+ //
1227+
1228+ double Optimizer::estimateSelectivity (const BooleanList& filters, double cardinality, unsigned priorConjuncts)
1229+ {
1230+ // Get selectivities and order them
1231+ SortedArray<double , InlineStorage<double , OPT_STATIC_ITEMS> > selectivities;
1232+
1233+ for (const auto filter : filters)
1234+ selectivities.add (getSelectivity (filter));
1235+
1236+ auto selectivity = MAXIMUM_SELECTIVITY;
1237+
1238+ if (selectivities.hasData () && !priorConjuncts && cardinality)
1239+ {
1240+ // If the table is small enough, the hardcoded selectivity factors are causing
1241+ // too small resulting selectivity. Adjust the initial value to protect from this case.
1242+ const auto minSelectivity = MAXIMUM_SELECTIVITY / cardinality;
1243+ selectivity *= minSelectivity / selectivities.front ();
1244+ }
1245+
1246+ // Apply exponential backoff
1247+ for (auto factor : selectivities)
1248+ selectivity *= applyBackoff (factor, priorConjuncts++);
1249+
1250+ return selectivity;
1251+ }
1252+
1253+
12211254//
12221255// Prepare relation and its indices for optimization
12231256//
@@ -2970,6 +3003,7 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
29703003 BoolExprNode* condition = nullptr ;
29713004 Array<DbKeyRangeNode*> dbkeyRanges;
29723005 double scanSelectivity = MAXIMUM_SELECTIVITY;
3006+ double filterSelectivity = MAXIMUM_SELECTIVITY;
29733007
29743008 if (relation ()->getExtFile ())
29753009 {
@@ -3020,14 +3054,14 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30203054 // Persistent table
30213055 Retrieval retrieval (tdbb, this , stream, outerFlag, innerFlag,
30223056 (sortClause ? *sortClause : nullptr ), false );
3023- const auto candidate = retrieval.getInversion ();
30243057
3025- if (candidate)
3058+ if (const auto candidate = retrieval. getInversion () )
30263059 {
30273060 inversion = candidate->inversion ;
30283061 condition = candidate->condition ;
30293062 dbkeyRanges.assign (candidate->dbkeyRanges );
30303063 scanSelectivity = candidate->matchSelectivity ;
3064+ filterSelectivity = candidate->filterSelectivity ;
30313065
30323066 // Just for safety sake, this condition must be already checked
30333067 // inside OptimizerRetrieval::matchOnIndexes()
@@ -3066,8 +3100,8 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30663100 // booleans. When one is found, roll it into a final boolean and mark
30673101 // it used. If a computable boolean didn't match against an index then
30683102 // mark the stream to denote unmatched booleans.
3103+ BooleanList filters;
30693104 BoolExprNode* boolean = nullptr ;
3070- double filterSelectivity = MAXIMUM_SELECTIVITY;
30713105
30723106 for (auto iter = getConjuncts (outerFlag, innerFlag); iter.hasData (); ++iter)
30733107 {
@@ -3093,12 +3127,16 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
30933127 }
30943128
30953129 if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3096- filterSelectivity *= getSelectivity (*iter);
3130+ filters. add (*iter);
30973131 }
30983132 }
30993133 }
31003134
3101- if (!rsb)
3135+ if (rsb)
3136+ {
3137+ filterSelectivity = Optimizer::estimateSelectivity (filters, rsb->getCardinality ());
3138+ }
3139+ else
31023140 {
31033141 if (inversion && condition)
31043142 {
@@ -3134,9 +3172,13 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream,
31343172
31353173RecordSource* Optimizer::applyBoolean (RecordSource* rsb, ConjunctIterator& iter)
31363174{
3137- double selectivity = MAXIMUM_SELECTIVITY;
3138- if (const auto boolean = composeBoolean (iter, &selectivity))
3175+ BooleanList filters;
3176+
3177+ if (const auto boolean = composeBoolean (iter, filters))
3178+ {
3179+ const auto selectivity = estimateSelectivity (filters, rsb->getCardinality ());
31393180 rsb = FB_NEW_POOL (getPool ()) FilteredStream (csb, rsb, boolean, selectivity);
3181+ }
31403182
31413183 return rsb;
31423184}
@@ -3167,8 +3209,8 @@ RecordSource* Optimizer::applyLocalBoolean(RecordSource* rsb,
31673209
31683210RecordSource* Optimizer::applyResidualBoolean (RecordSource* rsb)
31693211{
3212+ BooleanList filters;
31703213 BoolExprNode* boolean = nullptr ;
3171- double selectivity = MAXIMUM_SELECTIVITY;
31723214
31733215 for (auto iter = getBaseConjuncts (); iter.hasData (); ++iter)
31743216 {
@@ -3178,15 +3220,17 @@ RecordSource* Optimizer::applyResidualBoolean(RecordSource* rsb)
31783220 iter |= CONJUNCT_USED;
31793221
31803222 if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3181- selectivity *= getSelectivity (*iter);
3223+ filters. add (*iter);
31823224 }
31833225 }
31843226
3227+ const auto selectivity = estimateSelectivity (filters, rsb->getCardinality ());
3228+
31853229 return boolean ? FB_NEW_POOL (getPool ()) FilteredStream (csb, rsb, boolean, selectivity) : rsb;
31863230}
31873231
31883232
3189- BoolExprNode* Optimizer::composeBoolean (ConjunctIterator& iter, double * selectivity )
3233+ BoolExprNode* Optimizer::composeBoolean (ConjunctIterator& iter, BooleanList& filters )
31903234{
31913235 BoolExprNode* boolean = nullptr ;
31923236
@@ -3199,8 +3243,8 @@ BoolExprNode* Optimizer::composeBoolean(ConjunctIterator& iter, double* selectiv
31993243 compose (getPool (), &boolean, iter);
32003244 iter |= CONJUNCT_USED;
32013245
3202- if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)) && selectivity )
3203- *selectivity *= getSelectivity (*iter);
3246+ if (!(iter & (CONJUNCT_MATCHED | CONJUNCT_JOINED)))
3247+ filters. add (*iter);
32043248 }
32053249 }
32063250
0 commit comments