5151/* This is a thread implementation for Win32 lazy implementation */
5252
5353/* Thread server common information */
54- //typedef struct{
55- // CRITICAL_SECTION lock;
56- // HANDLE filled;
57- // HANDLE killed;
58- //
59- // blas_queue_t *queue; /* Parameter Pointer */
60- // int shutdown; /* server shutdown flag */
61- //
62- //} blas_pool_t;
6354
6455static blas_queue_t * work_queue = NULL ;
6556static HANDLE kickoff_event = NULL ;
@@ -71,11 +62,19 @@ int blas_server_avail = 0;
7162/* Local Variables */
7263static BLASULONG server_lock = 0 ;
7364
74- //static blas_pool_t pool;
7565static HANDLE blas_threads [MAX_CPU_NUMBER ];
7666static DWORD blas_threads_id [MAX_CPU_NUMBER ];
67+ static volatile int thread_target ; // target num of live threads, volatile for cross-thread reads
7768
78-
69+ #if defined (__GNUC__ ) && (__GNUC__ < 6 )
70+ #define WIN_CAS (dest , exch , comp ) __sync_val_compare_and_swap(dest, comp, exch)
71+ #else
72+ #if defined(_WIN64 )
73+ #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange64(dest, exch, comp)
74+ #else
75+ #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange(dest, exch, comp)
76+ #endif
77+ #endif
7978
8079static void legacy_exec (void * func , int mode , blas_arg_t * args , void * sb ){
8180
@@ -206,14 +205,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
206205static DWORD WINAPI blas_thread_server (void * arg ){
207206
208207 /* Thread identifier */
209- #ifdef SMP_DEBUG
210208 BLASLONG cpu = (BLASLONG )arg ;
211- #endif
212209
213210 void * buffer , * sa , * sb ;
214211 blas_queue_t * queue ;
215- DWORD action ;
216- //HANDLE handles[] = {pool.filled, pool.killed};
217212
218213 /* Each server needs each buffer */
219214 buffer = blas_memory_alloc (2 );
@@ -232,6 +227,12 @@ static DWORD WINAPI blas_thread_server(void *arg){
232227 // event raised when work is added to the queue
233228 WaitForSingleObject (kickoff_event , INFINITE );
234229
230+ if (cpu > thread_target - 2 )
231+ {
232+ //printf("thread [%d] exiting.\n", cpu);
233+ break ; // excess thread, so worker thread exits
234+ }
235+
235236#ifdef SMP_DEBUG
236237 fprintf (STDERR , "Server[%2ld] Got it.\n" , cpu );
237238#endif
@@ -245,17 +246,17 @@ static DWORD WINAPI blas_thread_server(void *arg){
245246
246247 LeaveCriticalSection (& queue_lock );
247248#else
248- volatile work_queue_t * queue_next ;
249+ volatile blas_queue_t * queue_next ;
249250
250251 INT_PTR prev_value ;
251252 do {
252- queue = (volatile work_queue_t * )work_queue ;
253+ queue = (volatile blas_queue_t * )work_queue ;
253254 if (!queue )
254255 break ;
255256
256- queue_next = (volatile work_queue_t * )queue -> next ;
257+ queue_next = (volatile blas_queue_t * )queue -> next ;
257258 prev_value = WIN_CAS ((INT_PTR * )& work_queue , (INT_PTR )queue_next , (INT_PTR )queue );
258- } while (prev_value != work_item );
259+ } while (prev_value != queue );
259260#endif
260261
261262 if (queue ) {
@@ -377,9 +378,13 @@ int blas_thread_init(void){
377378 // create the kickoff Event
378379 kickoff_event = CreateEvent (NULL , TRUE, FALSE, NULL );
379380
381+ thread_target = blas_cpu_number ;
382+
380383 InitializeCriticalSection (& queue_lock );
381384
382385 for (i = 0 ; i < blas_cpu_number - 1 ; i ++ ){
386+ //printf("thread_init: creating thread [%d]\n", i);
387+
383388 blas_threads [i ] = CreateThread (NULL , 0 ,
384389 blas_thread_server , (void * )i ,
385390 0 , & blas_threads_id [i ]);
@@ -564,10 +569,36 @@ void goto_set_num_threads(int num_threads)
564569
565570 if (num_threads > MAX_CPU_NUMBER ) num_threads = MAX_CPU_NUMBER ;
566571
572+ if (blas_server_avail && num_threads < blas_num_threads ) {
573+ LOCK_COMMAND (& server_lock );
574+
575+ thread_target = num_threads ;
576+
577+ SetEvent (kickoff_event );
578+
579+ for (i = num_threads - 1 ; i < blas_num_threads - 1 ; i ++ ) {
580+ //printf("set_num_threads: waiting on thread [%d] to quit.\n", i);
581+
582+ WaitForSingleObject (blas_threads [i ], INFINITE );
583+
584+ //printf("set_num_threads: thread [%d] has quit.\n", i);
585+
586+ CloseHandle (blas_threads [i ]);
587+ }
588+
589+ blas_num_threads = num_threads ;
590+
591+ ResetEvent (kickoff_event );
592+
593+ UNLOCK_COMMAND (& server_lock );
594+ }
595+
567596 if (num_threads > blas_num_threads ) {
568597
569598 LOCK_COMMAND (& server_lock );
570599
600+ thread_target = num_threads ;
601+
571602 //increased_threads = 1;
572603 if (!blas_server_avail ){
573604 // create the kickoff Event
@@ -579,6 +610,7 @@ void goto_set_num_threads(int num_threads)
579610 }
580611
581612 for (i = (blas_num_threads > 0 ) ? blas_num_threads - 1 : 0 ; i < num_threads - 1 ; i ++ ){
613+ //printf("set_num_threads: creating thread [%d]\n", i);
582614
583615 blas_threads [i ] = CreateThread (NULL , 0 ,
584616 blas_thread_server , (void * )i ,
0 commit comments