Skip to content

Commit dc0338a

Browse files
authored
Merge pull request #4560 from martin-frbg/issue4551-3
Add support for negative increments to the ?NRM2 kernels for RISC-V RVV targets
2 parents 855bbdd + cf80bd8 commit dc0338a

4 files changed

Lines changed: 9 additions & 9 deletions

File tree

kernel/riscv64/nrm2_rvv.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
101101
{
102102
BLASLONG i=0;
103103

104-
if (n <= 0 || inc_x <= 0) return(0.0);
104+
if (n <= 0 || inc_x == 0) return(0.0);
105105
if(n == 1) return (ABS(x[0]));
106106

107107
unsigned int gvl = 0;
@@ -119,7 +119,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
119119
unsigned int stride_x = inc_x * sizeof(FLOAT);
120120
int idx = 0;
121121

122-
if( n >= gvl ) // don't pay overheads if we're not doing useful work
122+
if( n >= gvl && inc_x > 0 ) // don't pay overheads if we're not doing useful work
123123
{
124124
for(i=0; i<n/gvl; i++){
125125
v0 = VLSEV_FLOAT( &x[idx], stride_x, gvl );
@@ -190,7 +190,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
190190
//finish any tail using scalar ops
191191
i*=gvl*inc_x;
192192
n*=inc_x;
193-
while(i < n){
193+
while(abs(i) < abs(n)){
194194
if ( x[i] != 0.0 ){
195195
FLOAT absxi = ABS( x[i] );
196196
if ( scale < absxi ){

kernel/riscv64/nrm2_vector.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
104104
{
105105
BLASLONG i=0;
106106

107-
if (n <= 0 || inc_x <= 0) return(0.0);
107+
if (n <= 0 || inc_x == 0) return(0.0);
108108
if(n == 1) return (ABS(x[0]));
109109

110110
unsigned int gvl = 0;
@@ -122,7 +122,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
122122
unsigned int stride_x = inc_x * sizeof(FLOAT);
123123
int idx = 0;
124124

125-
if( n >= gvl ) // don't pay overheads if we're not doing useful work
125+
if( n >= gvl && inc_x > 0) // don't pay overheads if we're not doing useful work
126126
{
127127
for(i=0; i<n/gvl; i++){
128128
v0 = VLSEV_FLOAT( &x[idx], stride_x, gvl );
@@ -193,7 +193,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
193193
//finish any tail using scalar ops
194194
i*=gvl*inc_x;
195195
n*=inc_x;
196-
while(i < n){
196+
while(abs(i)< abs(n)){
197197
if ( x[i] != 0.0 ){
198198
FLOAT absxi = ABS( x[i] );
199199
if ( scale < absxi ){

kernel/riscv64/znrm2_rvv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
6969
{
7070
BLASLONG i=0, j=0;
7171

72-
if (n <= 0 || inc_x <= 0) return(0.0);
72+
if (n <= 0 || inc_x == 0) return(0.0);
7373

7474
FLOAT_V_T vr, v0, v_zero;
7575
unsigned int gvl = 0;

kernel/riscv64/znrm2_vector.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
9696
{
9797
BLASLONG i=0;
9898

99-
if (n <= 0 || inc_x <= 0) return(0.0);
99+
if (n <= 0 || inc_x == 0) return(0.0);
100100

101101
FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero;
102102
unsigned int gvl = 0;
@@ -176,7 +176,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
176176
}
177177

178178
i += inc_x*2;
179-
}while(i<n);
179+
}while(abs(i)<abs(n));
180180
}
181181

182182
return(scale * sqrt(ssq));

0 commit comments

Comments
 (0)