@@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
198198}
199199#endif
200200
201+ #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV2 )
202+ static inline int get_gemm_optimal_nthreads_neoversev2 (double MNK , int ncpu ) {
203+ return
204+ MNK < 125000L ? 1
205+ : MNK < 1092727L ? MIN (ncpu , 6 )
206+ : MNK < 2628072L ? MIN (ncpu , 8 )
207+ : MNK < 8000000L ? MIN (ncpu , 12 )
208+ : MNK < 20346417L ? MIN (ncpu , 16 )
209+ : MNK < 57066625L ? MIN (ncpu , 24 )
210+ : MNK < 91125000L ? MIN (ncpu , 28 )
211+ : MNK < 238328000L ? MIN (ncpu , 40 )
212+ : MNK < 454756609L ? MIN (ncpu , 48 )
213+ : MNK < 857375000L ? MIN (ncpu , 56 )
214+ : MNK < 1073741824L ? MIN (ncpu , 64 )
215+ : ncpu ;
216+ }
217+ #endif
218+
201219static inline int get_gemm_optimal_nthreads (double MNK ) {
202220 int ncpu = num_cpu_avail (3 );
203221#if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
204222 return get_gemm_optimal_nthreads_neoversev1 (MNK , ncpu );
223+ #elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
224+ return get_gemm_optimal_nthreads_neoversev2 (MNK , ncpu );
205225#elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
206226 if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
207227 return get_gemm_optimal_nthreads_neoversev1 (MNK , ncpu );
208228 }
229+ if (strcmp (gotoblas_corename (), "neoversev2" ) == 0 ) {
230+ return get_gemm_optimal_nthreads_neoversev2 (MNK , ncpu );
231+ }
209232#endif
210233 if ( MNK <= (SMP_THRESHOLD_MIN * (double ) GEMM_MULTITHREAD_THRESHOLD ) ) {
211234 return 1 ;
0 commit comments