@@ -190,10 +190,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
190190 __vector_quad acc0 , acc1 , acc2 , acc3 , acc4 ,acc5 ,acc6 ,acc7 ;
191191 BLASLONG l = 0 ;
192192 vec_t * rowA = (vec_t * ) & AO [0 ];
193- vec_t * rb = (vec_t * ) & BO [0 ];
194193 __vector_pair rowB , rowB1 ;
195- __builtin_vsx_assemble_pair ( & rowB , rb [ 1 ], rb [0 ]);
196- __builtin_vsx_assemble_pair ( & rowB1 , rb [ 3 ], rb [ 2 ] );
194+ rowB = * (( __vector_pair * )(( void * ) & BO [0 ]) );
195+ rowB1 = * (( __vector_pair * )(( void * ) & BO [ 4 ]) );
197196 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
198197 __builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
199198 __builtin_mma_xvf64ger (& acc2 , rowB , rowA [1 ]);
@@ -205,9 +204,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
205204 for (l = 1 ; l < temp ; l ++ )
206205 {
207206 rowA = (vec_t * ) & AO [l << 3 ];
208- rb = (vec_t * ) & BO [l << 3 ];
209- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
210- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
207+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
208+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
211209 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
212210 __builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
213211 __builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [1 ]);
@@ -247,19 +245,17 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
247245 BLASLONG l = 0 ;
248246 vec_t * rowA = (vec_t * ) & AO [0 ];
249247 __vector_pair rowB , rowB1 ;
250- vec_t * rb = (vec_t * ) & BO [0 ];
251- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
252- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
248+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
249+ rowB1 = * ((__vector_pair * )((void * )& BO [4 ]));
253250 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
254251 __builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
255252 __builtin_mma_xvf64ger (& acc2 , rowB , rowA [1 ]);
256253 __builtin_mma_xvf64ger (& acc3 , rowB1 , rowA [1 ]);
257254 for (l = 1 ; l < temp ; l ++ )
258255 {
259256 rowA = (vec_t * ) & AO [l << 2 ];
260- rb = (vec_t * ) & BO [l << 3 ];
261- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
262- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
257+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
258+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
263259 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
264260 __builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
265261 __builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [1 ]);
@@ -291,17 +287,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
291287 BLASLONG l = 0 ;
292288 vec_t * rowA = (vec_t * ) & AO [0 ];
293289 __vector_pair rowB , rowB1 ;
294- vec_t * rb = (vec_t * ) & BO [0 ];
295- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
296- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
290+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
291+ rowB1 = * ((__vector_pair * )((void * )& BO [4 ]));
297292 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
298293 __builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
299294 for (l = 1 ; l < temp ; l ++ )
300295 {
301296 rowA = (vec_t * ) & AO [l << 1 ];
302- rb = (vec_t * ) & BO [l << 3 ];
303- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
304- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
297+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
298+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
305299 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
306300 __builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
307301 }
@@ -403,17 +397,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
403397 BLASLONG l = 0 ;
404398 vec_t * rowA = (vec_t * ) & AO [0 ];
405399 __vector_pair rowB ;
406- vec_t * rb = (vec_t * ) & BO [0 ];
407- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
400+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
408401 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
409402 __builtin_mma_xvf64ger (& acc1 , rowB , rowA [1 ]);
410403 __builtin_mma_xvf64ger (& acc2 , rowB , rowA [2 ]);
411404 __builtin_mma_xvf64ger (& acc3 , rowB , rowA [3 ]);
412405 for (l = 1 ; l < temp ; l ++ )
413406 {
414407 rowA = (vec_t * ) & AO [l << 3 ];
415- rb = (vec_t * ) & BO [l << 2 ];
416- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
408+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
417409 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
418410 __builtin_mma_xvf64gerpp (& acc1 , rowB , rowA [1 ]);
419411 __builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [2 ]);
@@ -445,15 +437,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
445437 BLASLONG l = 0 ;
446438 vec_t * rowA = (vec_t * ) & AO [0 ];
447439 __vector_pair rowB ;
448- vec_t * rb = (vec_t * ) & BO [0 ];
449- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
440+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
450441 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
451442 __builtin_mma_xvf64ger (& acc1 , rowB , rowA [1 ]);
452443 for (l = 1 ; l < temp ; l ++ )
453444 {
454445 rowA = (vec_t * ) & AO [l << 2 ];
455- rb = (vec_t * ) & BO [l << 2 ];
456- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
446+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
457447 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
458448 __builtin_mma_xvf64gerpp (& acc1 , rowB , rowA [1 ]);
459449 }
@@ -481,14 +471,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
481471 BLASLONG l = 0 ;
482472 vec_t * rowA = (vec_t * ) & AO [0 ];
483473 __vector_pair rowB ;
484- vec_t * rb = (vec_t * ) & BO [0 ];
485- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
474+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
486475 __builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
487476 for (l = 1 ; l < temp ; l ++ )
488477 {
489478 rowA = (vec_t * ) & AO [l << 1 ];
490- rb = (vec_t * ) & BO [l << 2 ];
491- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
479+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
492480 __builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
493481 }
494482 SAVE_ACC (& acc0 , 0 );
0 commit comments