Skip to content

Commit d1ee2e9

Browse files
authored
Merge pull request #3375 from martin-frbg/issue3369
Add casts to eliminate compiler warnings for Haswell sasum/dasum
2 parents 3e8c448 + d172385 commit d1ee2e9

2 files changed

Lines changed: 14 additions & 14 deletions

File tree

kernel/x86_64/dasum_microk_haswell-2.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1)
3838

3939
__m256i abs_mask = _mm256_set1_epi64x(0x7fffffffffffffff);
4040
for (i = 0; i < tail_index_AVX2; i += 16) {
41-
accum_0 += (__m256d)_mm256_and_si256(_mm256_load_si256(&x1[i+ 0]), abs_mask);
42-
accum_1 += (__m256d)_mm256_and_si256(_mm256_load_si256(&x1[i+ 4]), abs_mask);
43-
accum_2 += (__m256d)_mm256_and_si256(_mm256_load_si256(&x1[i+ 8]), abs_mask);
44-
accum_3 += (__m256d)_mm256_and_si256(_mm256_load_si256(&x1[i+12]), abs_mask);
41+
accum_0 += (__m256d)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+ 0]), abs_mask);
42+
accum_1 += (__m256d)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+ 4]), abs_mask);
43+
accum_2 += (__m256d)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+ 8]), abs_mask);
44+
accum_3 += (__m256d)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+12]), abs_mask);
4545
}
4646

4747
accum_0 = accum_0 + accum_1 + accum_2 + accum_3;
@@ -63,10 +63,10 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1)
6363

6464
__m128i abs_mask2 = _mm_set1_epi64x(0x7fffffffffffffff);
6565
for (i = tail_index_AVX2; i < tail_index_SSE; i += 8) {
66-
accum_20 += (__m128d)_mm_and_si128(_mm_loadu_si128(&x1[i + 0]), abs_mask2);
67-
accum_21 += (__m128d)_mm_and_si128(_mm_loadu_si128(&x1[i + 2]), abs_mask2);
68-
accum_22 += (__m128d)_mm_and_si128(_mm_loadu_si128(&x1[i + 4]), abs_mask2);
69-
accum_23 += (__m128d)_mm_and_si128(_mm_loadu_si128(&x1[i + 6]), abs_mask2);
66+
accum_20 += (__m128d)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 0]), abs_mask2);
67+
accum_21 += (__m128d)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 2]), abs_mask2);
68+
accum_22 += (__m128d)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 4]), abs_mask2);
69+
accum_23 += (__m128d)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 6]), abs_mask2);
7070
}
7171

7272
accum_20 = accum_20 + accum_21 + accum_22 + accum_23;

kernel/x86_64/sasum_microk_haswell-2.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1)
3838

3939
__m256i abs_mask = _mm256_set1_epi32(0x7fffffff);
4040
for (i = 0; i < tail_index_AVX2; i += 32) {
41-
accum_0 += (__m256)_mm256_and_si256(_mm256_load_si256(&x1[i+ 0]), abs_mask);
42-
accum_1 += (__m256)_mm256_and_si256(_mm256_load_si256(&x1[i+ 8]), abs_mask);
43-
accum_2 += (__m256)_mm256_and_si256(_mm256_load_si256(&x1[i+16]), abs_mask);
44-
accum_3 += (__m256)_mm256_and_si256(_mm256_load_si256(&x1[i+24]), abs_mask);
41+
accum_0 += (__m256)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+ 0]), abs_mask);
42+
accum_1 += (__m256)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+ 8]), abs_mask);
43+
accum_2 += (__m256)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+16]), abs_mask);
44+
accum_3 += (__m256)_mm256_and_si256(_mm256_load_si256((__m256i*)&x1[i+24]), abs_mask);
4545
}
4646

4747
accum_0 = accum_0 + accum_1 + accum_2 + accum_3;
@@ -62,8 +62,8 @@ static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1)
6262

6363
__m128i abs_mask2 = _mm_set1_epi32(0x7fffffff);
6464
for (i = tail_index_AVX2; i < tail_index_SSE; i += 8) {
65-
accum_20 += (__m128)_mm_and_si128(_mm_loadu_si128(&x1[i + 0]), abs_mask2);
66-
accum_21 += (__m128)_mm_and_si128(_mm_loadu_si128(&x1[i + 4]), abs_mask2);
65+
accum_20 += (__m128)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 0]), abs_mask2);
66+
accum_21 += (__m128)_mm_and_si128(_mm_loadu_si128((__m128i*)&x1[i + 4]), abs_mask2);
6767
}
6868

6969
accum_20 += accum_21;

0 commit comments

Comments
 (0)