Skip to content

Commit 66bde62

Browse files
authored
Merge pull request #4503 from shivammonaka/OpenMP-Locks
OpenMP locks instead of busy-waiting with NUM_PARALLEL
2 parents dc0338a + d49ebc5 commit 66bde62

2 files changed

Lines changed: 62 additions & 20 deletions

File tree

driver/level3/level3_thread.c

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -545,13 +545,31 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
545545
*range_n, IFLOAT *sa, IFLOAT *sb,
546546
BLASLONG nthreads_m, BLASLONG nthreads_n) {
547547

548-
#ifndef USE_OPENMP
549-
#ifndef OS_WINDOWS
550-
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
548+
#ifdef USE_OPENMP
549+
static omp_lock_t level3_lock, critical_section_lock;
550+
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
551+
parallel_section_left = MAX_PARALLEL_NUMBER;
552+
553+
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
554+
while(omp_lock_initialized == 0)
555+
{
556+
blas_lock(&init_lock);
557+
{
558+
if(omp_lock_initialized == 0)
559+
{
560+
omp_init_lock(&level3_lock);
561+
omp_init_lock(&critical_section_lock);
562+
omp_lock_initialized = 1;
563+
WMB;
564+
}
565+
blas_unlock(&init_lock);
566+
}
567+
}
568+
#elif defined(OS_WINDOWS)
569+
CRITICAL_SECTION level3_lock;
570+
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
551571
#else
552-
CRITICAL_SECTION level3_lock;
553-
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
554-
#endif
572+
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
555573
#endif
556574

557575
blas_arg_t newarg;
@@ -599,12 +617,28 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
599617
#endif
600618
#endif
601619

602-
#ifndef USE_OPENMP
603-
#ifndef OS_WINDOWS
604-
pthread_mutex_lock(&level3_lock);
620+
#ifdef USE_OPENMP
621+
omp_set_lock(&level3_lock);
622+
omp_set_lock(&critical_section_lock);
623+
624+
parallel_section_left--;
625+
626+
/*
627+
How OpenMP locks works with NUM_PARALLEL
628+
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
629+
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
630+
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
631+
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
632+
*/
633+
if(parallel_section_left != 0)
634+
omp_unset_lock(&level3_lock);
635+
636+
omp_unset_lock(&critical_section_lock);
637+
638+
#elif defined(OS_WINDOWS)
639+
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
605640
#else
606-
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
607-
#endif
641+
pthread_mutex_lock(&level3_lock);
608642
#endif
609643

610644
#ifdef USE_ALLOC_HEAP
@@ -732,12 +766,24 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
732766
free(job);
733767
#endif
734768

735-
#ifndef USE_OPENMP
736-
#ifndef OS_WINDOWS
737-
pthread_mutex_unlock(&level3_lock);
738-
#else
769+
#ifdef USE_OPENMP
770+
omp_set_lock(&critical_section_lock);
771+
parallel_section_left++;
772+
773+
/*
774+
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
775+
otherwise just increment the parallel_section_left
776+
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
777+
*/
778+
if(parallel_section_left == 1)
779+
omp_unset_lock(&level3_lock);
780+
781+
omp_unset_lock(&critical_section_lock);
782+
783+
#elif defined(OS_WINDOWS)
739784
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
740-
#endif
785+
#else
786+
pthread_mutex_unlock(&level3_lock);
741787
#endif
742788

743789
return 0;

driver/others/blas_server_omp.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
407407
}
408408
#endif
409409

410-
while(true) {
411410
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
412411
#ifdef HAVE_C11
413412
_Bool inuse = false;
@@ -420,9 +419,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
420419
break;
421420
}
422421
}
423-
if(i != MAX_PARALLEL_NUMBER)
424-
break;
425-
}
426422

427423
if (openblas_omp_adaptive_env() != 0) {
428424
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)

0 commit comments

Comments
 (0)