@@ -36,7 +36,6 @@ void test(queue &Q, int M, int N, int K)
3636 auto A = malloc_device<T>(lda * K, Q);
3737 auto B = malloc_device<T>(ldb * N, Q);
3838 auto C = malloc_device<T>(ldc * N, Q);
39- auto flag = malloc_shared<int >(1 , Q);
4039
4140 constexpr int rd_size = 1048576 ;
4241 auto host_data = malloc_host<T>(rd_size, Q);
@@ -59,36 +58,23 @@ void test(queue &Q, int M, int N, int K)
5958 replicate_data (Q, B, ldb * N, host_data, rd_size);
6059
6160 /* Verify that the leading entries of C are correct */
62- std::cout << " -> Verification...\n " ;
61+ std::cout << " -> Verification..." ;
6362 (void ) time_gemms (1 );
6463 size_t elems = std::min (ldc * N, rd_size);
65- Q.copy (C, host_data, elems);
66- flag[ 0 ] = 0 ;
64+ Q.copy (C, host_data, elems). wait () ;
65+ bool ok = true ;
6766 int linear_id = 0 ;
6867 for (size_t j = 0 ; j < N; j++) {
6968 for (size_t i = 0 ; i < M; i++) {
7069 linear_id = j*ldc + i;
7170 if (linear_id >= elems) break ;
7271 if (host_data[linear_id] != T (K)) {
73- flag[ 0 ] = 1 ;
72+ ok = false ;
7473 }
7574 }
7675 if (linear_id >= elems) break ;
7776 }
78- /*
79- for (size_t i = 0; i < elems; i++) {
80- int count = 0;
81- if (host_data[i] != T(K)) {
82- flag[0] = 1;
83- if (count < 10) {
84- sycl::ext::oneapi::experimental::printf("error elem %d expect %f got %f\n",
85- i, T(K), host_data[i]);
86- count++;
87- }
88- }
89- }
90- */
91- std::cout << " verification " << (flag[0 ] == 0 ? " passes." : " FAILS!" ) << std::endl;
77+ std::cout << (ok ? " passes." : " FAILS!" ) << std::endl;
9278
9379 /* Fill A/B with random data */
9480 generate_random_data (rd_size, host_data);
@@ -131,7 +117,6 @@ void test(queue &Q, int M, int N, int K)
131117 free (A, Q);
132118 free (B, Q);
133119 free (C, Q);
134- free (flag, Q);
135120 free (host_data, Q);
136121}
137122
0 commit comments