|
452 | 452 |
|
453 | 453 | MOVDDUP(4 * SIZE, A1, a1) |
454 | 454 |
|
455 | | - movsd 0 * SIZE(YY), yy1 |
456 | | - movhpd 1 * SIZE(YY), yy1 |
457 | | - movsd 2 * SIZE(YY), yy2 |
458 | | - movhpd 3 * SIZE(YY), yy2 |
459 | | - |
460 | 455 | movapd 8 * SIZE(XX), xtemp1 |
461 | 456 | movapd 10 * SIZE(XX), xtemp2 |
462 | 457 | movapd 12 * SIZE(XX), xtemp3 |
|
475 | 470 | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) |
476 | 471 | ALIGN_3 |
477 | 472 |
|
| 473 | +.L12_prep: |
| 474 | + movsd 0 * SIZE(YY), yy1 |
| 475 | + movhpd 1 * SIZE(YY), yy1 |
| 476 | + movsd 2 * SIZE(YY), yy2 |
| 477 | + movhpd 3 * SIZE(YY), yy2 |
| 478 | + |
478 | 479 | .L12: |
479 | 480 | movapd xtemp1, xt1 |
480 | 481 | mulpd a1, xt1 |
|
608 | 609 |
|
609 | 610 | movlpd yy2, 6 * SIZE(YY) |
610 | 611 | movhpd yy2, 7 * SIZE(YY) |
611 | | - movsd 10 * SIZE(YY), yy2 |
612 | | - movhpd 11 * SIZE(YY), yy2 |
613 | 612 |
|
614 | 613 | movapd xtemp2, xt1 |
615 | 614 | movapd 18 * SIZE(XX), xtemp2 |
|
621 | 620 |
|
622 | 621 | movlpd yy1, 4 * SIZE(YY) |
623 | 622 | movhpd yy1, 5 * SIZE(YY) |
624 | | - movsd 8 * SIZE(YY), yy1 |
625 | | - movhpd 9 * SIZE(YY), yy1 |
626 | 623 |
|
627 | 624 | subq $-16 * SIZE, XX |
628 | 625 | addq $ 8 * SIZE, YY |
629 | 626 | addq $ 8 * SIZE, A1 |
630 | 627 | addq $ 8 * SIZE, A2 |
631 | 628 |
|
632 | 629 | decq I |
633 | | - jg .L12 |
| 630 | + jg .L12_prep |
| 631 | + jmp .L15 |
634 | 632 | ALIGN_3 |
635 | 633 |
|
636 | 634 | .L14: |
|
641 | 639 | jle .L16 |
642 | 640 |
|
643 | 641 | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) |
644 | | - jmp .L15_pastcheck |
645 | 642 |
|
646 | 643 | .L15: |
647 | 644 | movq M, I |
|
650 | 647 | testq $2, I |
651 | 648 | jle .L16 |
652 | 649 |
|
| 650 | + movsd 0 * SIZE(YY), yy1 |
| 651 | + movhpd 1 * SIZE(YY), yy1 |
| 652 | + movsd 2 * SIZE(YY), yy2 |
| 653 | + movhpd 3 * SIZE(YY), yy2 |
| 654 | + |
653 | 655 | .L15_pastcheck: |
654 | 656 | movapd xtemp1, xt1 |
655 | 657 | mulpd a1, xt1 |
|
705 | 707 |
|
706 | 708 | movlpd yy2, 2 * SIZE(YY) |
707 | 709 | movhpd yy2, 3 * SIZE(YY) |
708 | | - movsd 6 * SIZE(YY), yy2 |
709 | | - movhpd 7 * SIZE(YY), yy2 |
710 | 710 |
|
711 | 711 | movapd xtemp2, xt1 |
712 | 712 | movapd 10 * SIZE(XX), xtemp2 |
|
717 | 717 |
|
718 | 718 | movlpd yy1, 0 * SIZE(YY) |
719 | 719 | movhpd yy1, 1 * SIZE(YY) |
720 | | - movsd 4 * SIZE(YY), yy1 |
721 | | - movhpd 5 * SIZE(YY), yy1 |
722 | 720 |
|
723 | 721 | addq $4 * SIZE, YY |
724 | 722 | addq $4 * SIZE, A1 |
|
731 | 729 |
|
732 | 730 | MOVDDUP(1 * SIZE, A1, a2) |
733 | 731 |
|
| 732 | + movsd 0 * SIZE(YY), yy1 |
| 733 | + movhpd 1 * SIZE(YY), yy1 |
| 734 | + |
734 | 735 | movapd xtemp1, xt1 |
735 | 736 | mulpd a1, xt1 |
736 | 737 | mulpd atemp1, a1 |
|
0 commit comments