Skip to content

Commit bb1c4fa

Browse files
committed
sbgemm: cooperlake: prefetch A & B
1 parent 7a2d160 commit bb1c4fa

1 file changed

Lines changed: 13 additions & 6 deletions

File tree

kernel/x86_64/sbgemm_kernel_16x4_cooperlake.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6464
#define DECLARE_B_PAIR() \
6565
__m512i B_lo; __m512i B_hi;
6666

67+
#define PREFETCH_B_STEP 32
68+
#define PREFETCH_B(Bx, By) \
69+
if (By == 0) asm("prefetcht0 %c1(%0)": : "r"(ptr_b##Bx), "n"(PREFETCH_B_STEP * 2)); \
70+
else asm("prefetcht0 %c3(%0, %1, %c2)": : "r"(ptr_b##Bx), "r"(n_blksize), "n"(By*2), "n"(PREFETCH_B_STEP * 2))
71+
6772
#define BROADCAST_B_PAIR(Bx, By) \
6873
BROADCAST64(ptr_b##Bx, n_blksize, By, 0, B_lo); \
6974
BROADCAST64(ptr_b##Bx, n_blksize, By, 4, B_hi);
@@ -204,17 +209,19 @@ int CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A, IFLOAT *
204209
k_count = k;
205210
for (; k_count > 3; k_count -=4) {
206211
LOAD_A_PAIR(0);
212+
_mm_prefetch(ptr_a0 + 128, _MM_HINT_T0);
207213
ptr_a0 += 16 * 2;
208-
BROADCAST_B_PAIR(0, 0); MATMUL_4X(0, 0, 0);
209-
BROADCAST_B_PAIR(0, 1); MATMUL_4X(0, 0, 1);
210-
BROADCAST_B_PAIR(0, 2); MATMUL_4X(0, 0, 2);
214+
BROADCAST_B_PAIR(0, 0); PREFETCH_B(0, 0); MATMUL_4X(0, 0, 0);
215+
BROADCAST_B_PAIR(0, 1); PREFETCH_B(0, 1); MATMUL_4X(0, 0, 1);
216+
BROADCAST_B_PAIR(0, 2); PREFETCH_B(0, 2); MATMUL_4X(0, 0, 2);
211217
ptr_b0 += 4 * 2;
212-
BROADCAST_B_PAIR(1, 0); MATMUL_4X(0, 1, 0);
213-
BROADCAST_B_PAIR(1, 1); MATMUL_4X(0, 1, 1);
214-
BROADCAST_B_PAIR(1, 2); MATMUL_4X(0, 1, 2);
218+
BROADCAST_B_PAIR(1, 0); PREFETCH_B(1, 0); MATMUL_4X(0, 1, 0);
219+
BROADCAST_B_PAIR(1, 1); PREFETCH_B(1, 1); MATMUL_4X(0, 1, 1);
220+
BROADCAST_B_PAIR(1, 2); PREFETCH_B(1, 2); MATMUL_4X(0, 1, 2);
215221
ptr_b1 += 4 * 2;
216222

217223
LOAD_A_PAIR(0);
224+
_mm_prefetch(ptr_a0 + 128, _MM_HINT_T0);
218225
ptr_a0 += 16 * 2;
219226
BROADCAST_B_PAIR(0, 0); MATMUL_4X(0, 0, 0);
220227
BROADCAST_B_PAIR(0, 1); MATMUL_4X(0, 0, 1);

0 commit comments

Comments
 (0)