Skip to content

Commit 7d1becc

Browse files
authored
Allocate an auxiliary struct when running out of preconfigured threads
1 parent 6bb1805 commit 7d1becc

1 file changed

Lines changed: 142 additions & 3 deletions

File tree

driver/others/memory.c

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,8 +2636,25 @@ static volatile struct {
26362636

26372637
} memory[NUM_BUFFERS];
26382638

2639-
static int memory_initialized = 0;
2639+
static volatile struct newmemstruct
2640+
{
2641+
BLASULONG lock;
2642+
void *addr;
2643+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
2644+
int pos;
2645+
#endif
2646+
int used;
2647+
#ifndef __64BIT__
2648+
char dummy[48];
2649+
#else
2650+
char dummy[40];
2651+
#endif
26402652

2653+
};
2654+
static volatile struct newmemstruct *newmemory;
2655+
2656+
static int memory_initialized = 0;
2657+
static int memory_overflowed = 0;
26412658
/* Memory allocation routine */
26422659
/* procpos ... indicates where it comes from */
26432660
/* 0 : Level 3 functions */
@@ -2779,6 +2796,29 @@ void *blas_memory_alloc(int procpos){
27792796
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
27802797
UNLOCK_COMMAND(&alloc_lock);
27812798
#endif
2799+
if (memory_overflowed) {
2800+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2801+
LOCK_COMMAND(&alloc_lock);
2802+
#endif
2803+
do {
2804+
RMB;
2805+
#if defined(USE_OPENMP)
2806+
if (!newmemory[position-NUM_BUFFERS].used) {
2807+
blas_lock(&newmemory[position-NUM_BUFFERS].lock);
2808+
#endif
2809+
if (!newmemory[position-NUM_BUFFERS].used) goto allocation2;
2810+
2811+
#if defined(USE_OPENMP)
2812+
blas_unlock(&newmemory[position-NUM_BUFFERS].lock);
2813+
}
2814+
#endif
2815+
position ++;
2816+
2817+
} while (position < 512+NUM_BUFFERS);
2818+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2819+
UNLOCK_COMMAND(&alloc_lock);
2820+
#endif
2821+
}
27822822
goto error;
27832823

27842824
allocation :
@@ -2883,6 +2923,90 @@ void *blas_memory_alloc(int procpos){
28832923
return (void *)memory[position].addr;
28842924

28852925
error:
2926+
if (memory_overflowed) goto terminate;
2927+
printf("num_buffers exceeded, adding auxiliary array\n");
2928+
memory_overflowed=1;
2929+
newmemory= (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct));
2930+
for (int i=0;i<512;i++) {
2931+
newmemory[i].addr = (void *)0;
2932+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
2933+
newmemory[i].pos = -1;
2934+
#endif
2935+
newmemory[i].used = 0;
2936+
newmemory[i].lock = 0;
2937+
}
2938+
newmemory[position-NUM_BUFFERS].used = 1;
2939+
2940+
allocation2:
2941+
newmemory[position-NUM_BUFFERS].used = 1;
2942+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2943+
UNLOCK_COMMAND(&alloc_lock);
2944+
#else
2945+
blas_unlock(&newmemory[position-NUM_BUFFERS].lock);
2946+
#endif
2947+
do {
2948+
#ifdef DEBUG
2949+
printf("Allocation Start : %lx\n", base_address);
2950+
#endif
2951+
2952+
map_address = (void *)-1;
2953+
2954+
func = &memoryalloc[0];
2955+
2956+
while ((func != NULL) && (map_address == (void *) -1)) {
2957+
2958+
map_address = (*func)((void *)base_address);
2959+
2960+
#ifdef ALLOC_DEVICEDRIVER
2961+
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
2962+
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
2963+
}
2964+
#endif
2965+
2966+
#ifdef ALLOC_HUGETLBFILE
2967+
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
2968+
#ifndef OS_WINDOWS
2969+
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
2970+
#endif
2971+
}
2972+
#endif
2973+
2974+
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
2975+
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
2976+
#endif
2977+
2978+
func ++;
2979+
}
2980+
2981+
#ifdef DEBUG
2982+
printf(" Success -> %08lx\n", map_address);
2983+
#endif
2984+
if (((BLASLONG) map_address) == -1) base_address = 0UL;
2985+
2986+
if (base_address) base_address += BUFFER_SIZE + FIXED_PAGESIZE;
2987+
2988+
} while ((BLASLONG)map_address == -1);
2989+
2990+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2991+
LOCK_COMMAND(&alloc_lock);
2992+
#endif
2993+
newmemory[position-NUM_BUFFERS].addr = map_address;
2994+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2995+
UNLOCK_COMMAND(&alloc_lock);
2996+
#endif
2997+
2998+
//#ifdef DEBUG
2999+
printf(" Mapping Succeeded. %p(%d)\n", (void *)newmemory[position-NUM_BUFFERS].addr, position);
3000+
//#endif
3001+
3002+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
3003+
3004+
if (newmemory[position-NUM_BUFFERS].pos == -1) newmemory[position-NUM_BUFFERS].pos = mypos;
3005+
3006+
#endif
3007+
return (void *)newmemory[position-NUM_BUFFERS].addr;
3008+
3009+
terminate:
28863010
printf("OpenBLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
28873011
printf("This library was built to support a maximum of %d threads - either rebuild OpenBLAS\n", NUM_BUFFERS);
28883012
printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n");
@@ -2907,13 +3031,28 @@ void blas_memory_free(void *free_area){
29073031
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
29083032
position++;
29093033

2910-
if (position >= NUM_BUFFERS) goto error;
3034+
if (position >= NUM_BUFFERS && !memory_overflowed) goto error;
29113035

29123036
#ifdef DEBUG
29133037
if (memory[position].addr != free_area) goto error;
29143038
printf(" Position : %d\n", position);
29153039
#endif
3040+
if (memory_overflowed) {
3041+
while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area))
3042+
position++;
3043+
// arm: ensure all writes are finished before other thread takes this memory
3044+
WMB;
29163045

3046+
newmemory[position].used = 0;
3047+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
3048+
UNLOCK_COMMAND(&alloc_lock);
3049+
#endif
3050+
3051+
//#ifdef DEBUG
3052+
printf("Unmap from overflow area succeeded.\n\n");
3053+
//#endif
3054+
return;
3055+
} else {
29173056
// arm: ensure all writes are finished before other thread takes this memory
29183057
WMB;
29193058

@@ -2927,7 +3066,7 @@ void blas_memory_free(void *free_area){
29273066
#endif
29283067

29293068
return;
2930-
3069+
}
29313070
error:
29323071
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
29333072

0 commit comments

Comments
 (0)