Skip to content

Commit bc19101

Browse files
committed
Using OpenMP locks with NUM_PARALLEL
1 parent 394a9fb commit bc19101

2 files changed

Lines changed: 62 additions & 20 deletions

File tree

driver/level3/level3_thread.c

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,31 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
548548
*range_n, IFLOAT *sa, IFLOAT *sb,
549549
BLASLONG nthreads_m, BLASLONG nthreads_n) {
550550

551-
#ifndef USE_OPENMP
552-
#ifndef OS_WINDOWS
553-
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
551+
#ifdef USE_OPENMP
552+
static omp_lock_t level3_lock, critical_section_lock;
553+
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
554+
parallel_section_left = MAX_PARALLEL_NUMBER;
555+
556+
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
557+
while(omp_lock_initialized == 0)
558+
{
559+
blas_lock(&init_lock);
560+
{
561+
if(omp_lock_initialized == 0)
562+
{
563+
omp_init_lock(&level3_lock);
564+
omp_init_lock(&critical_section_lock);
565+
omp_lock_initialized = 1;
566+
WMB;
567+
}
568+
blas_unlock(&init_lock);
569+
}
570+
}
571+
#elif defined(OS_WINDOWS)
572+
CRITICAL_SECTION level3_lock;
573+
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
554574
#else
555-
CRITICAL_SECTION level3_lock;
556-
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
557-
#endif
575+
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
558576
#endif
559577

560578
blas_arg_t newarg;
@@ -597,12 +615,28 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
597615
#endif
598616
#endif
599617

600-
#ifndef USE_OPENMP
601-
#ifndef OS_WINDOWS
602-
pthread_mutex_lock(&level3_lock);
618+
#ifdef USE_OPENMP
619+
omp_set_lock(&level3_lock);
620+
omp_set_lock(&critical_section_lock);
621+
622+
parallel_section_left--;
623+
624+
/*
625+
How OpenMP locks works with NUM_PARALLEL
626+
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
627+
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
628+
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
629+
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
630+
*/
631+
if(parallel_section_left != 0)
632+
omp_unset_lock(&level3_lock);
633+
634+
omp_unset_lock(&critical_section_lock);
635+
636+
#elif defined(OS_WINDOWS)
637+
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
603638
#else
604-
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
605-
#endif
639+
pthread_mutex_lock(&level3_lock);
606640
#endif
607641

608642
#ifdef USE_ALLOC_HEAP
@@ -730,12 +764,24 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
730764
free(job);
731765
#endif
732766

733-
#ifndef USE_OPENMP
734-
#ifndef OS_WINDOWS
735-
pthread_mutex_unlock(&level3_lock);
736-
#else
767+
#ifdef USE_OPENMP
768+
omp_set_lock(&critical_section_lock);
769+
parallel_section_left++;
770+
771+
/*
772+
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
773+
otherwise just increment the parallel_section_left
774+
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
775+
*/
776+
if(parallel_section_left == 1)
777+
omp_unset_lock(&level3_lock);
778+
779+
omp_unset_lock(&critical_section_lock);
780+
781+
#elif defined(OS_WINDOWS)
737782
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
738-
#endif
783+
#else
784+
pthread_mutex_unlock(&level3_lock);
739785
#endif
740786

741787
return 0;

driver/others/blas_server_omp.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
396396
}
397397
#endif
398398

399-
while(true) {
400399
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
401400
#ifdef HAVE_C11
402401
_Bool inuse = false;
@@ -409,9 +408,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
409408
break;
410409
}
411410
}
412-
if(i != MAX_PARALLEL_NUMBER)
413-
break;
414-
}
415411

416412
if (openblas_omp_adaptive_env() != 0) {
417413
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)

0 commit comments

Comments
 (0)