Skip to content

Commit b6aff47

Browse files
authored
Merge pull request #5619 from lujiaweics/fix/serialize_parallelized_syrk_function_callers
Serialize accesses to parallelized syrk functions from multiple calle…
2 parents 413e609 + 1f3b81e commit b6aff47

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

driver/level3/level3_syrk_threaded.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,33 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
504504

505505
int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){
506506

507+
#ifdef USE_OPENMP
508+
static omp_lock_t level3_lock, critical_section_lock;
509+
static volatile BLASULONG init_lock = 0, omp_lock_initialized = 0,
510+
parallel_section_left = MAX_PARALLEL_NUMBER;
511+
512+
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
513+
while(omp_lock_initialized == 0)
514+
{
515+
blas_lock(&init_lock);
516+
{
517+
if(omp_lock_initialized == 0)
518+
{
519+
omp_init_lock(&level3_lock);
520+
omp_init_lock(&critical_section_lock);
521+
omp_lock_initialized = 1;
522+
WMB;
523+
}
524+
blas_unlock(&init_lock);
525+
}
526+
}
527+
#elif defined(OS_WINDOWS)
528+
CRITICAL_SECTION level3_lock;
529+
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
530+
#else
531+
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
532+
#endif
533+
507534
blas_arg_t newarg;
508535

509536
#ifndef USE_ALLOC_HEAP
@@ -560,6 +587,30 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
560587
#endif
561588
#endif
562589

590+
#ifdef USE_OPENMP
591+
omp_set_lock(&level3_lock);
592+
omp_set_lock(&critical_section_lock);
593+
594+
parallel_section_left--;
595+
596+
/*
597+
How OpenMP locks works with NUM_PARALLEL
598+
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
599+
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
600+
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
601+
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
602+
*/
603+
if(parallel_section_left != 0)
604+
omp_unset_lock(&level3_lock);
605+
606+
omp_unset_lock(&critical_section_lock);
607+
608+
#elif defined(OS_WINDOWS)
609+
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
610+
#else
611+
pthread_mutex_lock(&level3_lock);
612+
#endif
613+
563614
newarg.m = args -> m;
564615
newarg.n = args -> n;
565616
newarg.k = args -> k;
@@ -706,5 +757,25 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
706757
free(job);
707758
#endif
708759

760+
#ifdef USE_OPENMP
761+
omp_set_lock(&critical_section_lock);
762+
parallel_section_left++;
763+
764+
/*
765+
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
766+
otherwise just increment the parallel_section_left
767+
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
768+
*/
769+
if(parallel_section_left == 1)
770+
omp_unset_lock(&level3_lock);
771+
772+
omp_unset_lock(&critical_section_lock);
773+
774+
#elif defined(OS_WINDOWS)
775+
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
776+
#else
777+
pthread_mutex_unlock(&level3_lock);
778+
#endif
779+
709780
return 0;
710781
}

0 commit comments

Comments
 (0)