@@ -18,149 +18,164 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
1818
1919#ifdef DOUBLE
2020 long double safmin = DBL_MIN ;
21+ long double rtmin = sqrt (DBL_MIN /DBL_EPSILON );
2122#else
2223 long double safmin = FLT_MIN ;
24+ long double rtmin = sqrt (FLT_MIN /FLT_EPSILON );
2325#endif
2426
25- #if defined(__i386__ ) || defined(__x86_64__ ) || defined(__ia64__ ) || defined(_M_X64 ) || defined(_M_IX86 )
2627
27- long double da_r = * (DA + 0 );
28- long double da_i = * (DA + 1 );
29- long double db_r = * (DB + 0 );
30- long double db_i = * (DB + 1 );
31- long double r ;
28+ FLOAT da_r = * (DA + 0 );
29+ FLOAT da_i = * (DA + 1 );
30+ FLOAT db_r = * (DB + 0 );
31+ FLOAT db_i = * (DB + 1 );
32+ //long double r;
33+ FLOAT * r , * S1 = (FLOAT * )malloc (2 * sizeof (FLOAT ));
34+ FLOAT * R = (FLOAT * )malloc (2 * sizeof (FLOAT ));
35+ long double d ;
3236
33- long double ada = fabsl (da_r ) + fabsl (da_i );
34- long double adb = sqrt (db_r * db_r + db_i * db_i );
37+ FLOAT ada = da_r * da_r + da_i * da_i ;
38+ FLOAT adb = db_r * db_r + db_i * db_i ;
39+ FLOAT adart = sqrt ( da_r * da_r + da_i * da_i );
40+ FLOAT adbrt = sqrt ( db_r * db_r + db_i * db_i );
3541
3642 PRINT_DEBUG_NAME ;
3743
3844 IDEBUG_START ;
3945
4046 FUNCTION_PROFILE_START ();
4147
42- if (ada == ZERO ) {
43- * C = ZERO ;
44- * (S + 0 ) = ONE ;
48+ if (db_r == ZERO && db_i == ZERO ) {
49+ * C = ONE ;
50+ * (S + 0 ) = ZERO ;
4551 * (S + 1 ) = ZERO ;
46- * (DA + 0 ) = db_r ;
47- * (DA + 1 ) = db_i ;
48- } else {
49- long double alpha_r , alpha_i ;
50- long double safmax = 1. /safmin ;
51- long double sigma ;
52- long double maxab = MAX (ada ,adb );
53- long double scale = MIN (MAX (safmin ,maxab ), safmax );
54-
55-
56- long double aa_r = da_r / scale ;
57- long double aa_i = da_i / scale ;
58- long double bb_r = db_r / scale ;
59- long double bb_i = db_i / scale ;
60-
61- if (ada > adb )
62- sigma = copysign (1. ,da_r );
63- else
64- sigma = copysign (1. ,db_r );
65-
66- r = sigma * scale * sqrt (aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i );
67-
68-
69- alpha_r = da_r / ada ;
70- alpha_i = da_i / ada ;
71-
72- * (C + 0 ) = ada / r ;
73- * (S + 0 ) = (alpha_r * db_r + alpha_i * db_i ) / r ;
74- * (S + 1 ) = (alpha_i * db_r - alpha_r * db_i ) / r ;
75- * (DA + 0 ) = alpha_r * r ;
76- * (DA + 1 ) = alpha_i * r ;
52+ return ;
7753 }
78- #else
79- FLOAT da_r = * (DA + 0 );
80- FLOAT da_i = * (DA + 1 );
81- FLOAT db_r = * (DB + 0 );
82- FLOAT db_i = * (DB + 1 );
83- FLOAT r ;
84-
85- FLOAT ada = fabs (da_r ) + fabs (da_i );
86- FLOAT adb = fabs (db_r ) + fabs (db_i );
87-
88- PRINT_DEBUG_NAME ;
89-
90- IDEBUG_START ;
91-
92- FUNCTION_PROFILE_START ();
93-
94- if (ada == ZERO ) {
95- * C = ZERO ;
96- * (S + 0 ) = ONE ;
97- * (S + 1 ) = ZERO ;
98- * (DA + 0 ) = db_r ;
99- * (DA + 1 ) = db_i ;
100- } else {
101- long double safmax = 1. /safmin ;
102- FLOAT scale , sigma ;
103- FLOAT aa_r , aa_i , bb_r , bb_i ;
104- FLOAT alpha_r , alpha_i ;
105-
106- aa_r = fabs (da_r );
107- aa_i = fabs (da_i );
108-
109- if (aa_i > aa_r ) {
110- aa_r = fabs (da_i );
111- aa_i = fabs (da_r );
112- }
113-
114- if (aa_r == ZERO ) {
115- ada = 0. ;
116- } else {
117- scale = (aa_i / aa_r );
118- ada = aa_r * sqrt (ONE + scale * scale );
119- }
120-
121- bb_r = fabs (db_r );
122- bb_i = fabs (db_i );
123-
124- if (bb_i > bb_r ) {
125- bb_r = fabs (bb_i );
126- bb_i = fabs (bb_r );
127- }
128-
129- if (bb_r == ZERO ) {
130- adb = 0. ;
131- } else {
132- scale = (bb_i / bb_r );
133- adb = bb_r * sqrt (ONE + scale * scale );
134- }
135- FLOAT maxab = MAX (ada ,adb );
136- scale = MIN (MAX (safmin ,maxab ), safmax );
137-
138- aa_r = da_r / scale ;
139- aa_i = da_i / scale ;
140- bb_r = db_r / scale ;
141- bb_i = db_i / scale ;
14254
143- if (ada > adb )
144- sigma = copysign (1. ,da_r );
145- else
146- sigma = copysign (1. ,db_r );
147-
148- r = sigma * scale * sqrt (aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i );
149-
150- alpha_r = da_r / ada ;
151- alpha_i = da_i / ada ;
152-
153- * (C + 0 ) = ada / r ;
154- * (S + 0 ) = (alpha_r * db_r + alpha_i * db_i ) / r ;
155- * (S + 1 ) = (alpha_i * db_r - alpha_r * db_i ) / r ;
156- * (DA + 0 ) = alpha_r * r ;
157- * (DA + 1 ) = alpha_i * r ;
158- }
55+ long double safmax = 1. /safmin ;
56+ #if defined DOUBLE
57+ long double rtmax = safmax /DBL_EPSILON ;
58+ #else
59+ long double rtmax = safmax /FLT_EPSILON ;
15960#endif
160-
161- FUNCTION_PROFILE_END (4 , 4 , 4 );
162-
163- IDEBUG_END ;
164-
165- return ;
61+ * (S1 + 0 ) = * (DB + 0 );
62+ * (S1 + 1 ) = * (DB + 1 ) * -1 ;
63+ if (da_r == ZERO && da_i == ZERO ) {
64+ * C = ZERO ;
65+ if (db_r == ZERO ) {
66+ (* DA ) = fabsl (db_i );
67+ * S = * S1 /da_r ;
68+ * (S + 1 ) = * (S1 + 1 ) /da_r ;
69+ return ;
70+ } else if ( db_i == ZERO ) {
71+ * DA = fabsl (db_r );
72+ * S = * S1 /da_r ;
73+ * (S + 1 ) = * (S1 + 1 ) /da_r ;
74+ return ;
75+ } else {
76+ long double g1 = MAX ( fabsl (db_r ), fabsl (db_i ));
77+ rtmax = sqrt (safmax /2. );
78+ if (g1 > rtmin && g1 < rtmax ) { // unscaled
79+ d = sqrt (adb );
80+ * S = * S1 /d ;
81+ * (S + 1 ) = * (S1 + 1 ) /d ;
82+ * DA = d ;
83+ * (DA + 1 ) = ZERO ;
84+ return ;
85+ } else { // scaled algorithm
86+ long double u = MIN ( safmax , MAX ( safmin , g1 ));
87+ FLOAT gs_r = db_r /u ;
88+ FLOAT gs_i = db_i /u ;
89+ d = sqrt ( gs_r * gs_r + gs_i * gs_i );
90+ * S = gs_r / d ;
91+ * (S + 1 ) = (gs_i * -1 ) / d ;
92+ * DA = d * u ;
93+ * (DA + 1 ) = ZERO ;
94+ return ;
95+ }
96+ }
97+ } else {
98+ FLOAT f1 = MAX ( fabsl (da_r ), fabsl (da_i ));
99+ FLOAT g1 = MAX ( fabsl (db_r ), fabsl (db_i ));
100+ rtmax = sqrt (safmax / 4. );
101+ if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax ) { //unscaled
102+ long double h = ada + adb ;
103+ double adahsq = sqrt (ada * h );
104+ if (ada >= h * safmin ) {
105+ * C = sqrt (ada /h );
106+ * R = * DA / * C ;
107+ * (R + 1 ) = * (DA + 1 ) / * (C + 1 );
108+ rtmax *= 2. ;
109+ if ( ada > rtmin && h < rtmax ) { // no risk of intermediate overflow
110+ * S = * S1 * (* DA / adahsq ) - * (S1 + 1 )* (* (DA + 1 )/adahsq );
111+ * (S + 1 ) = * S1 * (* (DA + 1 ) / adahsq ) + * (S1 + 1 ) * (* DA /adahsq );
112+ } else {
113+ * S = * S1 * (* R /h ) - * (S1 + 1 ) * (* (R + 1 )/h );
114+ * (S + 1 ) = * S1 * (* (R + 1 )/h ) + * (S1 + 1 ) * (* (R )/h );
115+ }
116+ } else {
117+ * C = ada / adahsq ;
118+ if (* C >= safmin )
119+ * R = * DA / * C ;
120+ else
121+ * R = * DA * (h / adahsq );
122+ * S = * S1 * ada / adahsq ;
123+ * (S + 1 ) = * (S1 + 1 ) * ada / adahsq ;
124+ }
125+ * DA = * R ;
126+ * (DA + 1 )= * (R + 1 );
127+ return ;
128+ } else { // scaled
129+ FLOAT fs_r , fs_i , gs_r , gs_i ;
130+ long double v ,w ,f2 ,g2 ,h ;
131+ long double u = MIN ( safmax , MAX ( safmin , MAX (f1 ,g1 )));
132+ gs_r = db_r /u ;
133+ gs_i = db_i /u ;
134+ g2 = sqrt ( gs_r * gs_r + gs_i * gs_i );
135+ if (f1 /u < rtmin ) {
136+ v = MIN (safmax , MAX (safmin , f1 ));
137+ w = v / u ;
138+ fs_r = * DA / v ;
139+ fs_i = * (DA + 1 ) / v ;
140+ f2 = sqrt ( fs_r * fs_r + fs_i * fs_i );
141+ h = f2 * w * w + g2 ;
142+ } else { // use same scaling for both
143+ w = 1. ;
144+ fs_r = * DA / u ;
145+ fs_i = * (DA + 1 ) / u ;
146+ f2 = sqrt ( fs_r * fs_r + fs_i * fs_i );
147+ h = f2 + g2 ;
148+ }
149+ if ( f2 >= h * safmin ) {
150+ * C = sqrt ( f2 / h );
151+ * DA = fs_r / * C ;
152+ * (DA + 1 ) = fs_i / * C ;
153+ rtmax *= 2 ;
154+ if ( f2 > rtmin && h < rtmax ) {
155+ * S = gs_r * (fs_r /sqrt (f2 * h )) - gs_i * (fs_i / sqrt (f2 * h ));
156+ * (S + 1 ) = gs_r * (fs_i /sqrt (f2 * h )) + gs_i * -1. * (fs_r / sqrt (f2 * h ));
157+ } else {
158+ * S = gs_r * (* DA /h ) - gs_i * (* (DA + 1 ) / h );
159+ * (S + 1 ) = gs_r * (* (DA + 1 ) /h ) + gs_i * -1. * (* DA / h );
160+ }
161+ } else { // intermediates might overflow
162+ d = sqrt ( f2 * h );
163+ * C = f2 /d ;
164+ if (* C >= safmin ) {
165+ * DA = fs_r / * C ;
166+ * (DA + 1 ) = fs_i / * C ;
167+ } else {
168+ * DA = fs_r * (h / d );
169+ * (DA + 1 ) = fs_i / (h / d );
170+ }
171+ * S = gs_r * (fs_r /d ) - gs_i * (fs_i / d );
172+ * (S + 1 ) = gs_r * (fs_i /d ) + gs_i * -1. * (fs_r / d );
173+ }
174+ * C *= w ;
175+ * DA *= u ;
176+ * (DA + 1 ) *= u ;
177+ return ;
178+ }
179+ }
166180}
181+
0 commit comments