@@ -649,7 +649,8 @@ std::size_t OpenClContext::createInputVariable(double* v) {
649649void OpenClContext::updateVariatesPool () {
650650 QL_REQUIRE (nVariates_ > 0 , " OpenClContext::updateVariatesPool(): internal error, got nVariates_ == 0." );
651651
652- constexpr std::size_t mt_N = 624 ; // mersenne twister N
652+ constexpr std::size_t size_one = 1 ; // constant 1
653+ constexpr std::size_t mt_N = 624 ; // mersenne twister N
653654
654655 std::size_t fpSize = settings_.useDoublePrecision ? sizeof (double ) : sizeof (float );
655656
@@ -660,6 +661,7 @@ void OpenClContext::updateVariatesPool() {
660661
661662 std::string fpTypeStr = settings_.useDoublePrecision ? " double" : " float" ;
662663 std::string fpSuffix = settings_.useDoublePrecision ? " " : " f" ;
664+ std::string fpMaxValue = settings_.useDoublePrecision ? " 0x1.fffffffffffffp1023" : " 0x1.fffffep127f" ;
663665
664666 // clang-format off
665667 // ported from from QuantLib::InverseCumulativeNormal
@@ -687,12 +689,12 @@ void OpenClContext::updateVariatesPool() {
687689 " const " + fpTypeStr + " d4_ = 3.754408661907416e+00" + fpSuffix + " ;\n "
688690 " const " + fpTypeStr + " x_low_ = 0.02425" + fpSuffix + " ;\n "
689691 " const " + fpTypeStr + " x_high_ = 1.0" + fpSuffix + " - x_low_;\n "
690- " const " + fpTypeStr + " x = x0 / ( " + fpTypeStr + " )UINT_MAX ;\n "
692+ " const " + fpTypeStr + " x = (( " + fpTypeStr + " ) x0 + 0.5 " + fpSuffix + " ) / 4294967296.0 " + fpSuffix + " ;\n "
691693 " if (x < x_low_ || x_high_ < x) {\n "
692694 " if (x0 == UINT_MAX) {\n "
693- " return 0x1.fffffep127 " + fpSuffix + " ;\n "
695+ " return " + fpMaxValue + " ;\n "
694696 " } else if(x0 == 0) {\n "
695- " return -0x1.fffffep127 " + fpSuffix + " ;\n "
697+ " return -" + fpMaxValue + " ;\n "
696698 " }\n "
697699 " " + fpTypeStr + " z;\n "
698700 " if (x < x_low_) {\n "
@@ -718,7 +720,7 @@ void OpenClContext::updateVariatesPool() {
718720
719721 std::string kernelSourceSeedInit = " __kernel void ore_seedInitialization(const ulong s, __global ulong* mt) {\n "
720722 " const ulong N = 624;\n "
721- " mt[0]= s & 0xffffffffU ;\n "
723+ " mt[0]= s & 0xffffffffUL ;\n "
722724 " for (ulong mti=1; mti<N; ++mti) {\n "
723725 " mt[mti] = (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);\n "
724726 " mt[mti] &= 0xffffffffUL;\n "
@@ -751,8 +753,8 @@ void OpenClContext::updateVariatesPool() {
751753 " ulong mti = get_global_id(0);\n "
752754 " ulong y = mt[mti];\n "
753755 " y ^= (y >> 11);\n "
754- " y ^= (y << 7) & 0x9d2c5680U ;\n "
755- " y ^= (y << 15) & 0xefc60000U ;\n "
756+ " y ^= (y << 7) & 0x9d2c5680UL ;\n "
757+ " y ^= (y << 15) & 0xefc60000UL ;\n "
756758 " y ^= (y >> 18);\n "
757759 " output[offset + mti] = ore_invCumN((uint)y);\n "
758760 " }\n\n " ;
@@ -798,8 +800,7 @@ void OpenClContext::updateVariatesPool() {
798800 QL_REQUIRE (err == CL_SUCCESS,
799801 " OpenClContext::updateVariatesPool(): error setting kernel args seed init: " << errorText (err));
800802
801- constexpr std::size_t sizeOne = 1 ;
802- err = clEnqueueNDRangeKernel (queue_, variatesKernelSeedInit_, 1 , NULL , &sizeOne, NULL , 0 , NULL , &initEvent);
803+ err = clEnqueueNDRangeKernel (queue_, variatesKernelSeedInit_, 1 , NULL , &size_one, NULL , 0 , NULL , &initEvent);
803804 QL_REQUIRE (err == CL_SUCCESS,
804805 " OpenClContext::updateVariatesPool(): error running kernel seed init: " << errorText (err));
805806 }
@@ -855,7 +856,7 @@ void OpenClContext::updateVariatesPool() {
855856 " OpenClContext::updateVariatesPool(): error setting args for kernel twist: " << errorText (err));
856857 cl_event twistEvent;
857858 err = clEnqueueNDRangeKernel (
858- queue_, variatesKernelTwist_, 1 , NULL , &mt_N , NULL , variatesPoolSize_ == 0 || haveGenerated ? 1 : 0 ,
859+ queue_, variatesKernelTwist_, 1 , NULL , &size_one , NULL , variatesPoolSize_ == 0 || haveGenerated ? 1 : 0 ,
859860 variatesPoolSize_ == 0 ? &initEvent : (haveGenerated ? &generateEvent : NULL ), &twistEvent);
860861 QL_REQUIRE (err == CL_SUCCESS,
861862 " OpenClContext::updateVariatesPool(): error running kernel twist: " << errorText (err));
0 commit comments