@@ -233,9 +233,7 @@ class OpenClContext : public ComputeContext {
233233 // 2a indexed by var id
234234 std::vector<std::size_t > inputVarOffset_;
235235 std::vector<bool > inputVarIsScalar_;
236- std::vector<float > inputVarValue_;
237- std::vector<float *> inputVarPtr_;
238- std::vector<std::vector<float >> inputVarPtrVal_;
236+ std::vector<float > inputVarValues_;
239237
240238 // 2b collection of variable ids
241239 std::vector<std::size_t > freedVariables_;
@@ -424,9 +422,7 @@ std::pair<std::size_t, bool> OpenClContext::initiateCalculation(const std::size_
424422
425423 inputVarOffset_.clear ();
426424 inputVarIsScalar_.clear ();
427- inputVarValue_.clear ();
428- inputVarPtr_.clear ();
429- inputVarPtrVal_.clear ();
425+ inputVarValues_.clear ();
430426
431427 if (newCalc) {
432428 freedVariables_.clear ();
@@ -458,9 +454,8 @@ std::size_t OpenClContext::createInputVariable(double v) {
458454 }
459455 inputVarOffset_.push_back (nextOffset);
460456 inputVarIsScalar_.push_back (true );
461- inputVarValue_.push_back ((float )v);
462- inputVarPtr_.push_back (nullptr );
463- inputVarPtrVal_.push_back ({});
457+ inputVarValues_.push_back ((float )std::max (std::min (v, (double )std::numeric_limits<float >::max ()),
458+ -(double )std::numeric_limits<float >::max ()));
464459 return nVars_++;
465460}
466461
@@ -474,10 +469,9 @@ std::size_t OpenClContext::createInputVariable(double* v) {
474469 }
475470 inputVarOffset_.push_back (nextOffset);
476471 inputVarIsScalar_.push_back (false );
477- inputVarValue_.push_back (0 .0f );
478- inputVarPtrVal_.push_back (std::vector<float >(size_[currentId_-1 ]));
479- std::copy (v, v + size_[currentId_ - 1 ], inputVarPtrVal_.back ().begin ());
480- inputVarPtr_.push_back (&inputVarPtrVal_.back ()[0 ]);
472+ for (std::size_t i = 0 ; i < size_[currentId_ - 1 ]; ++i)
473+ inputVarValues_.push_back ((float )std::max (std::min (v[i], (double )std::numeric_limits<float >::max ()),
474+ -(double )std::numeric_limits<float >::max ()));
481475 return nVars_++;
482476}
483477
@@ -856,17 +850,12 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
856850 timerBase = timer.elapsed ().wall ;
857851 }
858852
859- std::vector< cl_event> inputBufferEvents ;
853+ cl_event inputBufferEvent ;
860854 if (inputBufferSize > 0 ) {
861- for (std::size_t i = 0 ; i < inputVarOffset_.size (); ++i) {
862- inputBufferEvents.push_back (cl_event ());
863- err = clEnqueueWriteBuffer (queue_, inputBuffer, CL_FALSE, sizeof (float ) * inputVarOffset_[i],
864- sizeof (float ) * (inputVarIsScalar_[i] ? 1 : size_[currentId_ - 1 ]),
865- inputVarIsScalar_[i] ? &inputVarValue_[i] : inputVarPtr_[i], 0 , NULL ,
866- &inputBufferEvents.back ());
867- QL_REQUIRE (err == CL_SUCCESS,
868- " OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText (err));
869- }
855+ err = clEnqueueWriteBuffer (queue_, inputBuffer, CL_FALSE, 0 , sizeof (float ) * inputBufferSize,
856+ &inputVarValues_[0 ], 0 , NULL , &inputBufferEvent);
857+ QL_REQUIRE (err == CL_SUCCESS,
858+ " OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText (err));
870859 }
871860
872861 if (debug_) {
@@ -891,13 +880,14 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
891880 // execute kernel
892881
893882 if (debug_) {
883+ err = clFinish (queue_);
894884 timerBase = timer.elapsed ().wall ;
895885 }
896886
897887 cl_event runEvent;
898- err = clEnqueueNDRangeKernel (queue_, kernel_[currentId_ - 1 ], 1 , NULL , &size_[currentId_ - 1 ], NULL ,
899- inputBufferEvents. size (), inputBufferEvents. empty () ? nullptr : &inputBufferEvents[ 0 ] ,
900- &runEvent);
888+ err =
889+ clEnqueueNDRangeKernel (queue_, kernel_[currentId_ - 1 ], 1 , NULL , &size_[currentId_ - 1 ], NULL ,
890+ inputBufferSize > 0 ? 1 : 0 , inputBufferSize > 0 ? &inputBufferEvent : NULL , &runEvent);
901891 QL_REQUIRE (err == CL_SUCCESS, " OpenClContext::finalizeCalculation(): enqueue kernel fails: " << errorText (err));
902892
903893 if (debug_) {
0 commit comments