Skip to content

Commit faf6752

Browse files
pcaspersjenkins
authored andcommitted
QPR-12119 implement regression model
1 parent 2337d0e commit faf6752

2 files changed

Lines changed: 165 additions & 36 deletions

File tree

QuantExt/qle/pricingengines/mcmultilegbaseengine.cpp

Lines changed: 156 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -827,38 +827,45 @@ void McMultiLegBaseEngine::calculate() const {
827827
}
828828

829829
if (exercise_ != nullptr) {
830-
regModelUndExInto[counter] =
831-
RegressionModel(*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; });
830+
regModelUndExInto[counter] = RegressionModel(
831+
*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; },
832+
model_->stateProcess()->size());
832833
regModelUndExInto[counter].train(polynomOrder_, polynomType_, pathValueUndExInto, pathValuesRef,
833834
simulationTimes);
834835
}
835836

836837
if (isExerciseTime) {
837-
auto exerciseValue = regModelUndExInto[counter].apply(pathValuesRef, simulationTimes);
838-
regModelContinuationValue[counter] =
839-
RegressionModel(*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; });
838+
auto exerciseValue = regModelUndExInto[counter].apply(model_->stateProcess()->initialValues(),
839+
pathValuesRef, simulationTimes);
840+
regModelContinuationValue[counter] = RegressionModel(
841+
*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; },
842+
model_->stateProcess()->size());
840843
regModelContinuationValue[counter].train(polynomOrder_, polynomType_, pathValueOption, pathValuesRef,
841844
simulationTimes,
842845
exerciseValue > RandomVariable(calibrationSamples_, 0));
843-
auto continuationValue = regModelContinuationValue[counter].apply(pathValuesRef, simulationTimes);
846+
auto continuationValue = regModelContinuationValue[counter].apply(model_->stateProcess()->initialValues(),
847+
pathValuesRef, simulationTimes);
844848
pathValueOption = conditionalResult(exerciseValue > continuationValue &&
845849
exerciseValue > RandomVariable(calibrationSamples_, 0),
846850
pathValueUndExInto, pathValueOption);
847-
regModelOption[counter] =
848-
RegressionModel(*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; });
851+
regModelOption[counter] = RegressionModel(
852+
*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; },
853+
model_->stateProcess()->size());
849854
regModelOption[counter].train(polynomOrder_, polynomType_, pathValueOption, pathValuesRef, simulationTimes);
850855
}
851856

852857
if (isXvaTime) {
853-
regModelUndDirty[counter] =
854-
RegressionModel(*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] != CfStatus::open; });
858+
regModelUndDirty[counter] = RegressionModel(
859+
*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] != CfStatus::open; },
860+
model_->stateProcess()->size());
855861
regModelUndDirty[counter].train(polynomOrder_, polynomType_, pathValueUndDirty, pathValuesRef,
856862
simulationTimes);
857863
}
858864

859865
if (exercise_ != nullptr) {
860-
regModelOption[counter] =
861-
RegressionModel(*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; });
866+
regModelOption[counter] = RegressionModel(
867+
*t, cashflowInfo, [&cfStatus](std::size_t i) { return cfStatus[i] == CfStatus::done; },
868+
model_->stateProcess()->size());
862869
regModelOption[counter].train(polynomOrder_, polynomType_, pathValueOption, pathValuesRef, simulationTimes);
863870
}
864871

@@ -942,15 +949,6 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
942949

943950
result[0] = RandomVariable(samples, resultValue_);
944951

945-
// create the initial state as a vector of pointers to rvs for interpolating states below
946-
947-
std::vector<RandomVariable> initialState;
948-
std::vector<const RandomVariable*> initialStatePointer;
949-
for (Size j = 0; j < externalModelIndices_.size(); ++j) {
950-
initialState.push_back(RandomVariable(samples, initialState_[j]));
951-
initialStatePointer.push_back(&initialState.back());
952-
}
953-
954952
// if we don't have an exercise, we return the dirty npv of the underlying at all times
955953

956954
if (exerciseTimes_.empty()) {
@@ -960,7 +958,7 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
960958
QL_REQUIRE(ind < exerciseXvaTimes_.size(),
961959
"MultiLegBaseAmcCalculator::simulatePath(): internal error, xva time "
962960
<< t << " not found in exerciseXvaTimes vector.");
963-
result[++counter] = regModelUndDirty_[ind].apply(effPaths, xvaTimes_);
961+
result[++counter] = regModelUndDirty_[ind].apply(initialState_, effPaths, xvaTimes_);
964962
}
965963
return result;
966964
}
@@ -983,8 +981,9 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
983981

984982
// make the exercise decision
985983

986-
RandomVariable exerciseValue = regModelUndExInto_[ind].apply(effPaths, xvaTimes_);
987-
RandomVariable continuationValue = regModelContinuationValue_[ind].apply(effPaths, xvaTimes_);
984+
RandomVariable exerciseValue = regModelUndExInto_[ind].apply(initialState_, effPaths, xvaTimes_);
985+
RandomVariable continuationValue =
986+
regModelContinuationValue_[ind].apply(initialState_, effPaths, xvaTimes_);
988987

989988
exercised_[counter + 1] = !exercised_[counter] && exerciseValue > continuationValue &&
990989
exerciseValue > RandomVariable(samples, 0.0);
@@ -1011,7 +1010,7 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
10111010

10121011
if (xvaTimes_.find(t) != xvaTimes_.end()) {
10131012

1014-
RandomVariable optionValue = regModelOption_[counter].apply(effPaths, xvaTimes_);
1013+
RandomVariable optionValue = regModelOption_[counter].apply(initialState_, effPaths, xvaTimes_);
10151014

10161015
/* Exercise value is "undExInto" if we are in the period between the date on which the exercise happend and
10171016
the next exercise date after that, otherwise it is the full dirty npv. This assumes that two exercise
@@ -1026,9 +1025,9 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
10261025
appropriately adjusted to the coupon periods. The worst that can happen is that the exercised value
10271026
uses the full dirty npv at a too early time. */
10281027

1029-
RandomVariable exercisedValue =
1030-
conditionalResult(exercised_[exerciseCounter], regModelUndExInto_[counter].apply(effPaths, xvaTimes_),
1031-
regModelUndDirty_[counter].apply(effPaths, xvaTimes_));
1028+
RandomVariable exercisedValue = conditionalResult(
1029+
exercised_[exerciseCounter], regModelUndExInto_[counter].apply(initialState_, effPaths, xvaTimes_),
1030+
regModelUndDirty_[counter].apply(initialState_, effPaths, xvaTimes_));
10321031

10331032
if (settlement_ == Settlement::Type::Cash) {
10341033
exercisedValue = applyInverseFilter(exercisedValue, cashExerciseValueWasAccountedForOnXvaTime);
@@ -1048,19 +1047,143 @@ std::vector<QuantExt::RandomVariable> McMultiLegBaseEngine::MultiLegBaseAmcCalcu
10481047

10491048
McMultiLegBaseEngine::RegressionModel::RegressionModel(const Real observationTime,
10501049
const std::vector<CashflowInfo>& cashflowInfo,
1051-
const std::function<bool(std::size_t)>& cashflowRelevant)
1052-
: observationTime_(observationTime) {}
1050+
const std::function<bool(std::size_t)>& cashflowRelevant,
1051+
const Size numberOfModelIndices)
1052+
: observationTime_(observationTime), numberOfModelIndices_(numberOfModelIndices) {
1053+
1054+
// determine (time, modelIndex) pairs to be included in the regressor
1055+
1056+
if (false) { // switch off for the moment
1057+
for (Size i = 0; i < cashflowInfo.size(); ++i) {
1058+
if (!cashflowRelevant(i))
1059+
continue;
1060+
for (Size j = 0; j < cashflowInfo[i].simulationTimes.size(); ++j) {
1061+
Real t = std::min(observationTime_, cashflowInfo[i].simulationTimes[j]);
1062+
for (auto& m : cashflowInfo[i].modelIndices[j]) {
1063+
regressorTimesModelIndices_.insert(std::make_pair(t, m));
1064+
}
1065+
}
1066+
}
1067+
}
1068+
1069+
// we always include the full model state as of the observation time
1070+
1071+
for (Size m = 0; m < numberOfModelIndices_; ++m) {
1072+
regressorTimesModelIndices_.insert(std::make_pair(observationTime, m));
1073+
}
1074+
}
10531075

10541076
void McMultiLegBaseEngine::RegressionModel::train(const Size polynomOrder,
10551077
const LsmBasisSystem::PolynomialType polynomType,
10561078
const RandomVariable& regressand,
10571079
const std::vector<std::vector<const RandomVariable*>>& paths,
1058-
const std::set<Real>& pathTimes, const Filter& filter) {}
1080+
const std::set<Real>& pathTimes, const Filter& filter) {
1081+
1082+
// check if the model is in the correct state
1083+
1084+
QL_REQUIRE(!isTrained_, "McMultiLegBaseEngine::RegressionModel::train(): internal error: model is already trained, "
1085+
"train() should not be called twice on the same model instance.");
1086+
1087+
// build the regressor
1088+
1089+
std::vector<const RandomVariable*> regressor;
1090+
for (auto const& [t, modelIdx] : regressorTimesModelIndices_) {
1091+
auto pt = pathTimes.find(t);
1092+
QL_REQUIRE(pt != pathTimes.end(),
1093+
"McMultiLegBaseEngine::RegressionModel::train(): internal error: did not find regressor time "
1094+
<< t << " in pathTimes.");
1095+
regressor.push_back(paths[std::distance(pathTimes.begin(), pt)][modelIdx]);
1096+
}
1097+
1098+
// get the basis functions
1099+
1100+
basisFns_ = RandomVariableLsmBasisSystem::multiPathBasisSystem(regressor.size(), polynomOrder, polynomType);
1101+
1102+
// compute the regression coefficients
1103+
1104+
regressionCoeffs_ = regressionCoefficients(regressand, regressor, basisFns_, filter);
1105+
1106+
// update state of model
1107+
1108+
isTrained_ = true;
1109+
}
10591110

10601111
RandomVariable
1061-
McMultiLegBaseEngine::RegressionModel::apply(const std::vector<std::vector<const RandomVariable*>>& paths,
1112+
McMultiLegBaseEngine::RegressionModel::apply(const Array& initialState,
1113+
const std::vector<std::vector<const RandomVariable*>>& paths,
10621114
const std::set<Real>& pathTimes) const {
1063-
QL_FAIL("apply not implemented.");
1115+
1116+
// check if model is trained
1117+
1118+
QL_REQUIRE(isTrained_, "McMultiLegBaseEngine::RegressionMdeol::apply(): internal error: model is not trained.");
1119+
1120+
// build initial state pointer
1121+
1122+
QL_REQUIRE(!paths.empty() && !paths.front().empty(),
1123+
"McMultiLegBaseEngine::RegressionMdeol::apply(): paths are empty or have empty first component");
1124+
Size samples = paths.front().front()->size();
1125+
1126+
std::vector<RandomVariable> initialStateValues;
1127+
std::vector<const RandomVariable*> initialStatePointer;
1128+
for (Size j = 0; j < initialState.size(); ++j) {
1129+
initialStateValues.push_back(RandomVariable(samples, initialState[j]));
1130+
initialStatePointer.push_back(&initialStateValues.back());
1131+
}
1132+
1133+
// build the regressor
1134+
1135+
std::vector<const RandomVariable*> regressor;
1136+
std::vector<RandomVariable> tmp;
1137+
1138+
for (auto const& [t, modelIdx] : regressorTimesModelIndices_) {
1139+
auto pt = pathTimes.find(t);
1140+
if (pt != pathTimes.end()) {
1141+
1142+
// the time is a path time, no need to interpolate the path
1143+
1144+
regressor.push_back(paths[std::distance(pathTimes.begin(), pt)][modelIdx]);
1145+
1146+
} else {
1147+
1148+
// the time is not a path time, we need to interpolate:
1149+
// find the sim times and model states before and after the exercise time
1150+
1151+
auto t2 = std::lower_bound(pathTimes.begin(), pathTimes.end(), t);
1152+
1153+
// t is after last path time => flat extrapolation
1154+
1155+
if (t2 == pathTimes.end()) {
1156+
regressor.push_back(paths[pathTimes.size() - 1][modelIdx]);
1157+
continue;
1158+
}
1159+
1160+
// t is before last path time
1161+
1162+
Real time2 = *t2;
1163+
std::vector<const RandomVariable*> s2 = paths[std::distance(pathTimes.begin(), t2)];
1164+
1165+
Real time1;
1166+
std::vector<const RandomVariable*> s1;
1167+
if (t2 == pathTimes.begin()) {
1168+
time1 = 0.0;
1169+
s1 = initialStatePointer;
1170+
} else {
1171+
time1 = *std::next(t2, -1);
1172+
s1 = paths[std::distance(pathTimes.begin(), std::next(t2, -1))];
1173+
}
1174+
1175+
// compute the interpolated state
1176+
1177+
RandomVariable alpha1(samples, (time2 - t) / (time2 - time1));
1178+
RandomVariable alpha2(samples, (t - time1) / (time2 - time1));
1179+
tmp.push_back(alpha1 * *s1[modelIdx] + alpha2 * *s2[modelIdx]);
1180+
regressor.push_back(&tmp.back());
1181+
}
1182+
}
1183+
1184+
// compute result and return it
1185+
1186+
return conditionalExpectation(regressor, basisFns_, regressionCoeffs_);
10641187
}
10651188

10661189
} // namespace QuantExt

QuantExt/qle/pricingengines/mcmultilegbaseengine.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,19 +122,25 @@ class McMultiLegBaseEngine {
122122
public:
123123
RegressionModel() = default;
124124
RegressionModel(const Real observationTime, const std::vector<CashflowInfo>& cashflowInfo,
125-
const std::function<bool(std::size_t)>& cashflowRelevant);
125+
const std::function<bool(std::size_t)>& cashflowRelevant, const Size numberOfModelIndices);
126126
// pathTimes must contain the observation time and the relevant cashflow simulation times
127127
void train(const Size polynomOrder, const LsmBasisSystem::PolynomialType polynomType,
128128
const RandomVariable& regressand, const std::vector<std::vector<const RandomVariable*>>& paths,
129129
const std::set<Real>& pathTimes, const Filter& filter = Filter());
130130
// pathTimes do not need to contain the observation time or the relevant cashflow simulation times
131-
RandomVariable apply(const std::vector<std::vector<const RandomVariable*>>& paths,
131+
RandomVariable apply(const Array& initialState, const std::vector<std::vector<const RandomVariable*>>& paths,
132132
const std::set<Real>& pathTimes) const;
133133

134134
private:
135-
Real observationTime_;
135+
Real observationTime_ = Null<Real>();
136+
Size numberOfModelIndices_ = Null<Size>();
137+
bool isTrained_ = false;
138+
std::set<std::pair<Real, Size>> regressorTimesModelIndices_;
139+
std::vector<std::function<RandomVariable(const std::vector<const RandomVariable*>&)>> basisFns_;
140+
Array regressionCoeffs_;
136141
};
137142

143+
// the implementation of the amc calculator interface used by the amc valuation engine
138144
class MultiLegBaseAmcCalculator : public AmcCalculator {
139145
public:
140146
MultiLegBaseAmcCalculator(

0 commit comments

Comments
 (0)