Skip to content

Commit caf407f

Browse files
pcaspersjenkins
authored andcommitted
QPR-12384 streamline input data copy host-to-device
1 parent ea6532e commit caf407f

1 file changed

Lines changed: 15 additions & 29 deletions

File tree

QuantExt/qle/math/openclenvironment.cpp

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,7 @@ class OpenClContext : public ComputeContext {
233233
// 2a indexed by var id
234234
std::vector<std::size_t> inputVarOffset_;
235235
std::vector<bool> inputVarIsScalar_;
236-
std::vector<float> inputVarValue_;
237-
std::vector<float*> inputVarPtr_;
238-
std::vector<std::vector<float>> inputVarPtrVal_;
236+
std::vector<float> inputVarValues_;
239237

240238
// 2b collection of variable ids
241239
std::vector<std::size_t> freedVariables_;
@@ -424,9 +422,7 @@ std::pair<std::size_t, bool> OpenClContext::initiateCalculation(const std::size_
424422

425423
inputVarOffset_.clear();
426424
inputVarIsScalar_.clear();
427-
inputVarValue_.clear();
428-
inputVarPtr_.clear();
429-
inputVarPtrVal_.clear();
425+
inputVarValues_.clear();
430426

431427
if (newCalc) {
432428
freedVariables_.clear();
@@ -458,10 +454,8 @@ std::size_t OpenClContext::createInputVariable(double v) {
458454
}
459455
inputVarOffset_.push_back(nextOffset);
460456
inputVarIsScalar_.push_back(true);
461-
inputVarValue_.push_back((float)std::max(std::min(v, (double)std::numeric_limits<float>::max()),
462-
-(double)std::numeric_limits<float>::max()));
463-
inputVarPtr_.push_back(nullptr);
464-
inputVarPtrVal_.push_back({});
457+
inputVarValues_.push_back((float)std::max(std::min(v, (double)std::numeric_limits<float>::max()),
458+
-(double)std::numeric_limits<float>::max()));
465459
return nVars_++;
466460
}
467461

@@ -475,13 +469,9 @@ std::size_t OpenClContext::createInputVariable(double* v) {
475469
}
476470
inputVarOffset_.push_back(nextOffset);
477471
inputVarIsScalar_.push_back(false);
478-
inputVarValue_.push_back(0.0f);
479-
inputVarPtrVal_.push_back(std::vector<float>(size_[currentId_-1]));
480472
for (std::size_t i = 0; i < size_[currentId_ - 1]; ++i)
481-
v[i] = (float)std::max(std::min(v[i], (double)std::numeric_limits<float>::max()),
482-
-(double)std::numeric_limits<float>::max());
483-
std::copy(v, v + size_[currentId_ - 1], inputVarPtrVal_.back().begin());
484-
inputVarPtr_.push_back(&inputVarPtrVal_.back()[0]);
473+
inputVarValues_.push_back((float)std::max(std::min(v[i], (double)std::numeric_limits<float>::max()),
474+
-(double)std::numeric_limits<float>::max()));
485475
return nVars_++;
486476
}
487477

@@ -860,17 +850,12 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
860850
timerBase = timer.elapsed().wall;
861851
}
862852

863-
std::vector<cl_event> inputBufferEvents;
853+
cl_event inputBufferEvent;
864854
if (inputBufferSize > 0) {
865-
for (std::size_t i = 0; i < inputVarOffset_.size(); ++i) {
866-
inputBufferEvents.push_back(cl_event());
867-
err = clEnqueueWriteBuffer(queue_, inputBuffer, CL_FALSE, sizeof(float) * inputVarOffset_[i],
868-
sizeof(float) * (inputVarIsScalar_[i] ? 1 : size_[currentId_ - 1]),
869-
inputVarIsScalar_[i] ? &inputVarValue_[i] : inputVarPtr_[i], 0, NULL,
870-
&inputBufferEvents.back());
871-
QL_REQUIRE(err == CL_SUCCESS,
872-
"OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText(err));
873-
}
855+
err = clEnqueueWriteBuffer(queue_, inputBuffer, CL_FALSE, 0, sizeof(float) * inputBufferSize,
856+
&inputVarValues_[0], 0, NULL, &inputBufferEvent);
857+
QL_REQUIRE(err == CL_SUCCESS,
858+
"OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText(err));
874859
}
875860

876861
if (debug_) {
@@ -895,13 +880,14 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
895880
// execute kernel
896881

897882
if (debug_) {
883+
err = clFinish(queue_);
898884
timerBase = timer.elapsed().wall;
899885
}
900886

901887
cl_event runEvent;
902-
err = clEnqueueNDRangeKernel(queue_, kernel_[currentId_ - 1], 1, NULL, &size_[currentId_ - 1], NULL,
903-
inputBufferEvents.size(), inputBufferEvents.empty() ? nullptr : &inputBufferEvents[0],
904-
&runEvent);
888+
err =
889+
clEnqueueNDRangeKernel(queue_, kernel_[currentId_ - 1], 1, NULL, &size_[currentId_ - 1], NULL,
890+
inputBufferSize > 0 ? 1 : 0, inputBufferSize > 0 ? &inputBufferEvent : NULL, &runEvent);
905891
QL_REQUIRE(err == CL_SUCCESS, "OpenClContext::finalizeCalculation(): enqueue kernel fails: " << errorText(err));
906892

907893
if (debug_) {

0 commit comments

Comments
 (0)