@@ -2636,8 +2636,25 @@ static volatile struct {
26362636
26372637} memory [NUM_BUFFERS ];
26382638
2639- static int memory_initialized = 0 ;
2639+ static volatile struct newmemstruct
2640+ {
2641+ BLASULONG lock ;
2642+ void * addr ;
2643+ #if defined(WHEREAMI ) && !defined(USE_OPENMP )
2644+ int pos ;
2645+ #endif
2646+ int used ;
2647+ #ifndef __64BIT__
2648+ char dummy [48 ];
2649+ #else
2650+ char dummy [40 ];
2651+ #endif
26402652
2653+ } ;
2654+ static volatile struct newmemstruct * newmemory ;
2655+
2656+ static int memory_initialized = 0 ;
2657+ static int memory_overflowed = 0 ;
26412658/* Memory allocation routine */
26422659/* procpos ... indicates where it comes from */
26432660/* 0 : Level 3 functions */
@@ -2779,6 +2796,29 @@ void *blas_memory_alloc(int procpos){
27792796#if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
27802797 UNLOCK_COMMAND (& alloc_lock );
27812798#endif
2799+ if (memory_overflowed ) {
2800+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
2801+ LOCK_COMMAND (& alloc_lock );
2802+ #endif
2803+ do {
2804+ RMB ;
2805+ #if defined(USE_OPENMP )
2806+ if (!newmemory [position - NUM_BUFFERS ].used ) {
2807+ blas_lock (& newmemory [position - NUM_BUFFERS ].lock );
2808+ #endif
2809+ if (!newmemory [position - NUM_BUFFERS ].used ) goto allocation2 ;
2810+
2811+ #if defined(USE_OPENMP )
2812+ blas_unlock (& newmemory [position - NUM_BUFFERS ].lock );
2813+ }
2814+ #endif
2815+ position ++ ;
2816+
2817+ } while (position < 512 + NUM_BUFFERS );
2818+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
2819+ UNLOCK_COMMAND (& alloc_lock );
2820+ #endif
2821+ }
27822822 goto error ;
27832823
27842824 allocation :
@@ -2883,6 +2923,90 @@ void *blas_memory_alloc(int procpos){
28832923 return (void * )memory [position ].addr ;
28842924
28852925 error :
2926+ if (memory_overflowed ) goto terminate ;
2927+ printf ("num_buffers exceeded, adding auxiliary array\n" );
2928+ memory_overflowed = 1 ;
2929+ newmemory = (struct newmemstruct * ) malloc (512 * sizeof (struct newmemstruct ));
2930+ for (int i = 0 ;i < 512 ;i ++ ) {
2931+ newmemory [i ].addr = (void * )0 ;
2932+ #if defined(WHEREAMI ) && !defined(USE_OPENMP )
2933+ newmemory [i ].pos = -1 ;
2934+ #endif
2935+ newmemory [i ].used = 0 ;
2936+ newmemory [i ].lock = 0 ;
2937+ }
2938+ newmemory [position - NUM_BUFFERS ].used = 1 ;
2939+
2940+ allocation2 :
2941+ newmemory [position - NUM_BUFFERS ].used = 1 ;
2942+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
2943+ UNLOCK_COMMAND (& alloc_lock );
2944+ #else
2945+ blas_unlock (& newmemory [position - NUM_BUFFERS ].lock );
2946+ #endif
2947+ do {
2948+ #ifdef DEBUG
2949+ printf ("Allocation Start : %lx\n" , base_address );
2950+ #endif
2951+
2952+ map_address = (void * )-1 ;
2953+
2954+ func = & memoryalloc [0 ];
2955+
2956+ while ((func != NULL ) && (map_address == (void * ) -1 )) {
2957+
2958+ map_address = (* func )((void * )base_address );
2959+
2960+ #ifdef ALLOC_DEVICEDRIVER
2961+ if ((* func == alloc_devicedirver ) && (map_address == (void * )-1 )) {
2962+ fprintf (stderr , "OpenBLAS Warning ... Physically contiguous allocation was failed.\n" );
2963+ }
2964+ #endif
2965+
2966+ #ifdef ALLOC_HUGETLBFILE
2967+ if ((* func == alloc_hugetlbfile ) && (map_address == (void * )-1 )) {
2968+ #ifndef OS_WINDOWS
2969+ fprintf (stderr , "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n" );
2970+ #endif
2971+ }
2972+ #endif
2973+
2974+ #if (defined ALLOC_SHM ) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS )
2975+ if ((* func == alloc_hugetlb ) && (map_address != (void * )-1 )) hugetlb_allocated = 1 ;
2976+ #endif
2977+
2978+ func ++ ;
2979+ }
2980+
2981+ #ifdef DEBUG
2982+ printf (" Success -> %08lx\n" , map_address );
2983+ #endif
2984+ if (((BLASLONG ) map_address ) == -1 ) base_address = 0UL ;
2985+
2986+ if (base_address ) base_address += BUFFER_SIZE + FIXED_PAGESIZE ;
2987+
2988+ } while ((BLASLONG )map_address == -1 );
2989+
2990+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
2991+ LOCK_COMMAND (& alloc_lock );
2992+ #endif
2993+ newmemory [position - NUM_BUFFERS ].addr = map_address ;
2994+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
2995+ UNLOCK_COMMAND (& alloc_lock );
2996+ #endif
2997+
2998+ //#ifdef DEBUG
2999+ printf (" Mapping Succeeded. %p(%d)\n" , (void * )newmemory [position - NUM_BUFFERS ].addr , position );
3000+ //#endif
3001+
3002+ #if defined(WHEREAMI ) && !defined(USE_OPENMP )
3003+
3004+ if (newmemory [position - NUM_BUFFERS ].pos == -1 ) newmemory [position - NUM_BUFFERS ].pos = mypos ;
3005+
3006+ #endif
3007+ return (void * )newmemory [position - NUM_BUFFERS ].addr ;
3008+
3009+ terminate :
28863010 printf ("OpenBLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n" );
28873011 printf ("This library was built to support a maximum of %d threads - either rebuild OpenBLAS\n" , NUM_BUFFERS );
28883012 printf ("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n" );
@@ -2907,13 +3031,28 @@ void blas_memory_free(void *free_area){
29073031 while ((position < NUM_BUFFERS ) && (memory [position ].addr != free_area ))
29083032 position ++ ;
29093033
2910- if (position >= NUM_BUFFERS ) goto error ;
3034+ if (position >= NUM_BUFFERS && ! memory_overflowed ) goto error ;
29113035
29123036#ifdef DEBUG
29133037 if (memory [position ].addr != free_area ) goto error ;
29143038 printf (" Position : %d\n" , position );
29153039#endif
3040+ if (memory_overflowed ) {
3041+ while ((position < NUM_BUFFERS + 512 ) && (newmemory [position - NUM_BUFFERS ].addr != free_area ))
3042+ position ++ ;
3043+ // arm: ensure all writes are finished before other thread takes this memory
3044+ WMB ;
29163045
3046+ newmemory [position ].used = 0 ;
3047+ #if (defined(SMP ) || defined(USE_LOCKING )) && !defined(USE_OPENMP )
3048+ UNLOCK_COMMAND (& alloc_lock );
3049+ #endif
3050+
3051+ //#ifdef DEBUG
3052+ printf ("Unmap from overflow area succeeded.\n\n" );
3053+ //#endif
3054+ return ;
3055+ } else {
29173056 // arm: ensure all writes are finished before other thread takes this memory
29183057 WMB ;
29193058
@@ -2927,7 +3066,7 @@ void blas_memory_free(void *free_area){
29273066#endif
29283067
29293068 return ;
2930-
3069+ }
29313070 error :
29323071 printf ("BLAS : Bad memory unallocation! : %4d %p\n" , position , free_area );
29333072
0 commit comments