|
559 | 559 | .macro _aes_xts_crypt enc |
560 | 560 | _define_aliases |
561 | 561 |
|
562 | | - // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). |
563 | | - movl 480(KEY), KEYLEN |
564 | | - |
565 | 562 | .if !\enc |
566 | 563 | // When decrypting a message whose length isn't a multiple of the AES |
567 | 564 | // block length, exclude the last full block from the main loop by |
568 | 565 | // subtracting 16 from LEN. This is needed because ciphertext stealing |
569 | 566 | // decryption uses the last two tweaks in reverse order. We'll handle |
570 | 567 | // the last full block and the partial block specially at the end. |
| 568 | + lea -16(LEN), %rax |
571 | 569 | test $15, LEN |
572 | | - jnz .Lneed_cts_dec\@ |
573 | | -.Lxts_init\@: |
| 570 | + cmovnz %rax, LEN |
574 | 571 | .endif |
575 | 572 |
|
| 573 | + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). |
| 574 | + movl 480(KEY), KEYLEN |
| 575 | + |
576 | 576 | // Setup the pointer to the round keys and cache as many as possible. |
577 | 577 | _setup_round_keys \enc |
578 | 578 |
|
|
661 | 661 | RET |
662 | 662 |
|
663 | 663 | .Lhandle_remainder\@: |
664 | | - add $4*VL, LEN // Undo the extra sub from earlier. |
665 | 664 |
|
666 | 665 | // En/decrypt any remaining full blocks, one vector at a time. |
667 | 666 | .if VL > 16 |
668 | | - sub $VL, LEN |
| 667 | + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. |
669 | 668 | jl .Lvec_at_a_time_done\@ |
670 | 669 | .Lvec_at_a_time\@: |
671 | 670 | _vmovdqu (SRC), V0 |
|
677 | 676 | sub $VL, LEN |
678 | 677 | jge .Lvec_at_a_time\@ |
679 | 678 | .Lvec_at_a_time_done\@: |
680 | | - add $VL-16, LEN // Undo the extra sub from earlier. |
| 679 | + add $VL-16, LEN // Undo extra sub of VL, then sub 16. |
681 | 680 | .else |
682 | | - sub $16, LEN |
| 681 | + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. |
683 | 682 | .endif |
684 | 683 |
|
685 | 684 | // En/decrypt any remaining full blocks, one at a time. |
|
694 | 693 | sub $16, LEN |
695 | 694 | jge .Lblock_at_a_time\@ |
696 | 695 | .Lblock_at_a_time_done\@: |
697 | | - add $16, LEN // Undo the extra sub from earlier. |
698 | | - |
699 | | -.Lfull_blocks_done\@: |
700 | | - // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to |
701 | | - // process the last 16 + LEN bytes. If LEN is zero, we're done. |
702 | | - test LEN, LEN |
703 | | - jnz .Lcts\@ |
704 | | - jmp .Ldone\@ |
705 | | - |
706 | | -.if !\enc |
707 | | -.Lneed_cts_dec\@: |
708 | | - sub $16, LEN |
709 | | - jmp .Lxts_init\@ |
710 | | -.endif |
| 696 | + add $16, LEN // Undo the extra sub of 16. |
| 697 | + // Now 0 <= LEN <= 15. If LEN is zero, we're done. |
| 698 | + jz .Ldone\@ |
711 | 699 |
|
712 | | -.Lcts\@: |
713 | | - // Do ciphertext stealing (CTS) to en/decrypt the last full block and |
714 | | - // the partial block. TWEAK0_XMM contains the next tweak. |
| 700 | + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. |
| 701 | + // Do ciphertext stealing to process the last 16 + LEN bytes. |
715 | 702 |
|
716 | 703 | .if \enc |
717 | 704 | // If encrypting, the main loop already encrypted the last full block to |
|
0 commit comments