Skip to content

Commit baf03a0

Browse files
authored
Merge pull request #3252 from martin-frbg/more_shortcuts
Further shortcuts for (small) cases that do not need buffer allocation
2 parents 7aab5e8 + f84197c commit baf03a0

7 files changed

Lines changed: 100 additions & 0 deletions

File tree

interface/ger.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ void CNAME(enum CBLAS_ORDER order,
164164
if (m == 0 || n == 0) return;
165165
if (alpha == 0.) return;
166166

167+
if (incx == 1 && incy == 1 && 1L*m*n <= 2048 *GEMM_MULTITHREAD_THRESHOLD) {
168+
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
169+
return;
170+
}
171+
167172
IDEBUG_START;
168173

169174
FUNCTION_PROFILE_START();

interface/spr.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,26 @@ void CNAME(enum CBLAS_ORDER order,
167167

168168
FUNCTION_PROFILE_START();
169169

170+
if (incx == 1 && n <100) {
171+
blasint i;
172+
if (uplo==0) {
173+
for (i = 0; i < n; i++){
174+
if (x[i] != ZERO) {
175+
AXPYU_K(i + 1, 0, 0, alpha * x[i], x, 1, a, 1, NULL, 0);
176+
}
177+
a += i + 1;
178+
}
179+
} else {
180+
for (i = 0; i < n; i++){
181+
if (x[i] != ZERO) {
182+
AXPYU_K(n - i, 0, 0, alpha * x[i], x + i, 1, a, 1, NULL, 0);
183+
}
184+
a += n - i;
185+
}
186+
}
187+
return;
188+
}
189+
170190
if (incx < 0 ) x -= (n - 1) * incx;
171191

172192
buffer = (FLOAT *)blas_memory_alloc(1);

interface/spr2.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,24 @@ void CNAME(enum CBLAS_ORDER order,
168168

169169
if (alpha == ZERO) return;
170170

171+
if (incx == 1 && incy == 1 && n < 50) {
172+
blasint i;
173+
if (!uplo) {
174+
for (i = 0; i < n; i++){
175+
AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0);
176+
AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0);
177+
a += i + 1;
178+
}
179+
} else {
180+
for (i = 0; i < n; i++){
181+
AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0);
182+
AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0);
183+
a += n - i;
184+
}
185+
}
186+
return;
187+
}
188+
171189
IDEBUG_START;
172190

173191
FUNCTION_PROFILE_START();

interface/syr2.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,25 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
170170

171171
IDEBUG_START;
172172

173+
if (incx == 1 && incy == 1 && n < 100) {
174+
blasint i;
175+
if (!uplo) {
176+
for (i = 0; i < n; i++){
177+
AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0);
178+
AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0);
179+
a += lda;
180+
}
181+
} else {
182+
for (i = 0; i < n; i++){
183+
AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0);
184+
AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0);
185+
a += 1 + lda;
186+
}
187+
}
188+
return;
189+
}
190+
191+
173192
FUNCTION_PROFILE_START();
174193

175194
if (incx < 0 ) x -= (n - 1) * incx;

interface/trsv.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
188188

189189
if (n == 0) return;
190190

191+
if (incx == 1 && trans == 0 && n < 50) {
192+
buffer = NULL;
193+
(trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer);
194+
return;
195+
}
196+
191197
IDEBUG_START;
192198

193199
FUNCTION_PROFILE_START();

interface/zsyr.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,32 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
172172

173173
if ((alpha_r == ZERO) && (alpha_i == ZERO)) return;
174174

175+
if (incx == 1 && n < 50) {
176+
blasint i;
177+
if (!uplo) {
178+
for (i = 0; i < n; i++){
179+
if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) {
180+
AXPYU_K(i + 1, 0, 0,
181+
alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1],
182+
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
183+
x, 1, a, 1, NULL, 0);
184+
}
185+
a += lda;
186+
}
187+
} else {
188+
for (i = 0; i < n; i++){
189+
if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) {
190+
AXPYU_K(n - i, 0, 0,
191+
alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1],
192+
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
193+
x + i * 2, 1, a, 1, NULL, 0);
194+
}
195+
a += 2 + lda;
196+
}
197+
}
198+
return;
199+
}
200+
175201
IDEBUG_START;
176202

177203
FUNCTION_PROFILE_START();

interface/ztrsv.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
199199

200200
if (n == 0) return;
201201

202+
if (incx == 1 && trans == 0 && n < 50) {
203+
buffer = NULL;
204+
(trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer);
205+
return;
206+
}
207+
202208
IDEBUG_START;
203209

204210
FUNCTION_PROFILE_START();

0 commit comments

Comments
 (0)