@@ -233,9 +233,7 @@ class OpenClContext : public ComputeContext {
233233 // 2a indexed by var id
234234 std::vector<std::size_t > inputVarOffset_;
235235 std::vector<bool > inputVarIsScalar_;
236- std::vector<float > inputVarValue_;
237- std::vector<float *> inputVarPtr_;
238- std::vector<std::vector<float >> inputVarPtrVal_;
236+ std::vector<float > inputVarValues_;
239237
240238 // 2b collection of variable ids
241239 std::vector<std::size_t > freedVariables_;
@@ -424,9 +422,7 @@ std::pair<std::size_t, bool> OpenClContext::initiateCalculation(const std::size_
424422
425423 inputVarOffset_.clear ();
426424 inputVarIsScalar_.clear ();
427- inputVarValue_.clear ();
428- inputVarPtr_.clear ();
429- inputVarPtrVal_.clear ();
425+ inputVarValues_.clear ();
430426
431427 if (newCalc) {
432428 freedVariables_.clear ();
@@ -458,10 +454,8 @@ std::size_t OpenClContext::createInputVariable(double v) {
458454 }
459455 inputVarOffset_.push_back (nextOffset);
460456 inputVarIsScalar_.push_back (true );
461- inputVarValue_.push_back ((float )std::max (std::min (v, (double )std::numeric_limits<float >::max ()),
462- -(double )std::numeric_limits<float >::max ()));
463- inputVarPtr_.push_back (nullptr );
464- inputVarPtrVal_.push_back ({});
457+ inputVarValues_.push_back ((float )std::max (std::min (v, (double )std::numeric_limits<float >::max ()),
458+ -(double )std::numeric_limits<float >::max ()));
465459 return nVars_++;
466460}
467461
@@ -475,13 +469,9 @@ std::size_t OpenClContext::createInputVariable(double* v) {
475469 }
476470 inputVarOffset_.push_back (nextOffset);
477471 inputVarIsScalar_.push_back (false );
478- inputVarValue_.push_back (0 .0f );
479- inputVarPtrVal_.push_back (std::vector<float >(size_[currentId_-1 ]));
480472 for (std::size_t i = 0 ; i < size_[currentId_ - 1 ]; ++i)
481- v[i] = (float )std::max (std::min (v[i], (double )std::numeric_limits<float >::max ()),
482- -(double )std::numeric_limits<float >::max ());
483- std::copy (v, v + size_[currentId_ - 1 ], inputVarPtrVal_.back ().begin ());
484- inputVarPtr_.push_back (&inputVarPtrVal_.back ()[0 ]);
473+ inputVarValues_.push_back ((float )std::max (std::min (v[i], (double )std::numeric_limits<float >::max ()),
474+ -(double )std::numeric_limits<float >::max ()));
485475 return nVars_++;
486476}
487477
@@ -860,17 +850,12 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
860850 timerBase = timer.elapsed ().wall ;
861851 }
862852
863- std::vector< cl_event> inputBufferEvents ;
853+ cl_event inputBufferEvent ;
864854 if (inputBufferSize > 0 ) {
865- for (std::size_t i = 0 ; i < inputVarOffset_.size (); ++i) {
866- inputBufferEvents.push_back (cl_event ());
867- err = clEnqueueWriteBuffer (queue_, inputBuffer, CL_FALSE, sizeof (float ) * inputVarOffset_[i],
868- sizeof (float ) * (inputVarIsScalar_[i] ? 1 : size_[currentId_ - 1 ]),
869- inputVarIsScalar_[i] ? &inputVarValue_[i] : inputVarPtr_[i], 0 , NULL ,
870- &inputBufferEvents.back ());
871- QL_REQUIRE (err == CL_SUCCESS,
872- " OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText (err));
873- }
855+ err = clEnqueueWriteBuffer (queue_, inputBuffer, CL_FALSE, 0 , sizeof (float ) * inputBufferSize,
856+ &inputVarValues_[0 ], 0 , NULL , &inputBufferEvent);
857+ QL_REQUIRE (err == CL_SUCCESS,
858+ " OpenClContext::finalizeCalculation(): writing to input buffer fails: " << errorText (err));
874859 }
875860
876861 if (debug_) {
@@ -895,13 +880,14 @@ void OpenClContext::finalizeCalculation(std::vector<double*>& output, const Sett
895880 // execute kernel
896881
897882 if (debug_) {
883+ err = clFinish (queue_);
898884 timerBase = timer.elapsed ().wall ;
899885 }
900886
901887 cl_event runEvent;
902- err = clEnqueueNDRangeKernel (queue_, kernel_[currentId_ - 1 ], 1 , NULL , &size_[currentId_ - 1 ], NULL ,
903- inputBufferEvents. size (), inputBufferEvents. empty () ? nullptr : &inputBufferEvents[ 0 ] ,
904- &runEvent);
888+ err =
889+ clEnqueueNDRangeKernel (queue_, kernel_[currentId_ - 1 ], 1 , NULL , &size_[currentId_ - 1 ], NULL ,
890+ inputBufferSize > 0 ? 1 : 0 , inputBufferSize > 0 ? &inputBufferEvent : NULL , &runEvent);
905891 QL_REQUIRE (err == CL_SUCCESS, " OpenClContext::finalizeCalculation(): enqueue kernel fails: " << errorText (err));
906892
907893 if (debug_) {
0 commit comments