Skip to content

Commit 84453b9

Browse files
committed
Support CONSISTENT_FPCSR on AARCH64
1 parent 667d0e0 commit 84453b9

2 files changed

Lines changed: 16 additions & 0 deletions

File tree

driver/others/blas_server.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,9 +470,13 @@ blas_queue_t *tscq;
470470
#endif
471471

472472
#ifdef CONSISTENT_FPCSR
473+
#ifdef __aarch64__
474+
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
475+
#else
473476
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
474477
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
475478
#endif
479+
#endif
476480

477481
#ifdef MONITOR
478482
main_status[cpu] = MAIN_RUNNING1;
@@ -746,9 +750,13 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
746750
queue -> position = pos;
747751

748752
#ifdef CONSISTENT_FPCSR
753+
#ifdef __aarch64__
754+
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue -> sse_mode));
755+
#else
749756
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
750757
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
751758
#endif
759+
#endif
752760

753761
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
754762

driver/others/blas_server_omp.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,12 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
284284
sb = queue -> sb;
285285

286286
#ifdef CONSISTENT_FPCSR
287+
#ifdef __aarch64__
288+
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
289+
#else
287290
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
288291
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
292+
#endif
289293
#endif
290294

291295
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
@@ -383,8 +387,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
383387

384388
#ifdef CONSISTENT_FPCSR
385389
for (i = 0; i < num; i ++) {
390+
#ifdef __aarch64__
391+
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode));
392+
#else
386393
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode));
387394
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode));
395+
#endif
388396
}
389397
#endif
390398

0 commit comments

Comments
 (0)