Skip to content

Commit c4b464c

Browse files
authored
Merge pull request #3273 from austinpagan/sbgemm_gcc10_fix
Power10: Fix for SBGEMM
2 parents baf03a0 + e6dd44d commit c4b464c

1 file changed

Lines changed: 29 additions & 5 deletions

File tree

kernel/power/sbgemm_kernel_power10.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,30 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
9898
rowC = (v2sf_t *) &CO[7* ldc+J]; \
9999
rowC[0] += result[6] * alpha;
100100

101+
#define SAVE4x2_ACC_SCALAR(ACC) { \
102+
__builtin_mma_disassemble_acc ((void *)result, ACC); \
103+
res[0] = result[0] * alpha; \
104+
res[1] = result[1] * alpha; \
105+
res[2] = result[2] * alpha; \
106+
res[3] = result[3] * alpha; \
107+
CO[0 * ldc] += res[0][0]; \
108+
CO[1 * ldc] += res[1][0]; \
109+
CO[2 * ldc] += res[2][0]; \
110+
CO[3 * ldc] += res[3][0]; \
111+
}
112+
113+
#define SAVE4x2_ACC1_SCALAR(ACC) { \
114+
__builtin_mma_disassemble_acc ((void *)result, ACC); \
115+
res[0] = result[0] * alpha; \
116+
res[1] = result[1] * alpha; \
117+
res[2] = result[2] * alpha; \
118+
res[3] = result[3] * alpha; \
119+
CO[4 * ldc] += res[0][0]; \
120+
CO[5 * ldc] += res[1][0]; \
121+
CO[6 * ldc] += res[2][0]; \
122+
CO[7 * ldc] += res[3][0]; \
123+
}
124+
101125
#define MMA __builtin_mma_xvbf16ger2pp
102126

103127
#define SAVE2x4_ACC(ACC, J) \
@@ -313,7 +337,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
313337
{
314338
IFLOAT *BO = B;
315339
v2sf_t *rowC;
316-
v2sf_t result[8];
340+
v4sf_t result[4], res[4];
317341
__vector_quad acc0, acc1;
318342
__builtin_mma_xxsetaccz (&acc0);
319343
__builtin_mma_xxsetaccz (&acc1);
@@ -335,8 +359,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
335359
MMA (&acc0, MERGE_HIGH (rowB[0], vzero), (vec_t) rowA);
336360
MMA (&acc1, MERGE_LOW (rowB[0], vzero), (vec_t) rowA);
337361
}
338-
SAVE4x2_ACC (&acc0, 0);
339-
SAVE4x2_ACC1 (&acc1, 0);
362+
SAVE4x2_ACC_SCALAR (&acc0);
363+
SAVE4x2_ACC1_SCALAR (&acc1);
340364
CO += 1;
341365
AO += k;
342366
BO += (k << 3);
@@ -547,7 +571,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
547571
{
548572
IFLOAT *BO = B;
549573
v2sf_t *rowC;
550-
v2sf_t result[8];
574+
v4sf_t result[4], res[4];
551575
__vector_quad acc0;
552576
BLASLONG l = 0;
553577
__builtin_mma_xxsetaccz (&acc0);
@@ -571,7 +595,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
571595
};
572596
MMA (&acc0, (vec_t)(rowB_mrg), (vec_t) rowA);
573597
}
574-
SAVE4x2_ACC (&acc0, 0);
598+
SAVE4x2_ACC_SCALAR (&acc0);
575599
AO += k;
576600
BO += (k << 2);
577601
CO += 1;

0 commit comments

Comments
 (0)