@@ -38,10 +38,10 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1)
3838
3939 __m256i abs_mask = _mm256_set1_epi64x (0x7fffffffffffffff );
4040 for (i = 0 ; i < tail_index_AVX2 ; i += 16 ) {
41- accum_0 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (& x1 [i + 0 ]), abs_mask );
42- accum_1 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (& x1 [i + 4 ]), abs_mask );
43- accum_2 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (& x1 [i + 8 ]), abs_mask );
44- accum_3 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (& x1 [i + 12 ]), abs_mask );
41+ accum_0 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (( __m256i * ) & x1 [i + 0 ]), abs_mask );
42+ accum_1 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (( __m256i * ) & x1 [i + 4 ]), abs_mask );
43+ accum_2 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (( __m256i * ) & x1 [i + 8 ]), abs_mask );
44+ accum_3 += (__m256d )_mm256_and_si256 (_mm256_load_si256 (( __m256i * ) & x1 [i + 12 ]), abs_mask );
4545 }
4646
4747 accum_0 = accum_0 + accum_1 + accum_2 + accum_3 ;
@@ -63,10 +63,10 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1)
6363
6464 __m128i abs_mask2 = _mm_set1_epi64x (0x7fffffffffffffff );
6565 for (i = tail_index_AVX2 ; i < tail_index_SSE ; i += 8 ) {
66- accum_20 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (& x1 [i + 0 ]), abs_mask2 );
67- accum_21 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (& x1 [i + 2 ]), abs_mask2 );
68- accum_22 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (& x1 [i + 4 ]), abs_mask2 );
69- accum_23 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (& x1 [i + 6 ]), abs_mask2 );
66+ accum_20 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (( __m128i * ) & x1 [i + 0 ]), abs_mask2 );
67+ accum_21 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (( __m128i * ) & x1 [i + 2 ]), abs_mask2 );
68+ accum_22 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (( __m128i * ) & x1 [i + 4 ]), abs_mask2 );
69+ accum_23 += (__m128d )_mm_and_si128 (_mm_loadu_si128 (( __m128i * ) & x1 [i + 6 ]), abs_mask2 );
7070 }
7171
7272 accum_20 = accum_20 + accum_21 + accum_22 + accum_23 ;
0 commit comments