|
537 | 537 | add $7*16, KEY |
538 | 538 | .else |
539 | 539 | add $(15+7)*16, KEY |
540 | | -.endif |
541 | 540 |
|
542 | | - // Check whether the data length is a multiple of the AES block length. |
| 541 | + // When decrypting a message whose length isn't a multiple of the AES |
| 542 | + // block length, exclude the last full block from the main loop by |
| 543 | + // subtracting 16 from LEN. This is needed because ciphertext stealing |
| 544 | + // decryption uses the last two tweaks in reverse order. We'll handle |
| 545 | + // the last full block and the partial block specially at the end. |
543 | 546 | test $15, LEN |
544 | | - jnz .Lneed_cts\@ |
| 547 | + jnz .Lneed_cts_dec\@ |
545 | 548 | .Lxts_init\@: |
| 549 | +.endif |
546 | 550 |
|
547 | 551 | // Cache as many round keys as possible. |
548 | 552 | _load_round_keys |
|
685 | 689 | _vaes_4x \enc, 1, 12 |
686 | 690 | jmp .Lencrypt_4x_done\@ |
687 | 691 |
|
688 | | -.Lneed_cts\@: |
689 | | - // The data length isn't a multiple of the AES block length, so |
690 | | - // ciphertext stealing (CTS) will be needed. Subtract one block from |
691 | | - // LEN so that the main loop doesn't process the last full block. The |
692 | | - // CTS step will process it specially along with the partial block. |
| 692 | +.if !\enc |
| 693 | +.Lneed_cts_dec\@: |
693 | 694 | sub $16, LEN |
694 | 695 | jmp .Lxts_init\@ |
| 696 | +.endif |
695 | 697 |
|
696 | 698 | .Lcts\@: |
697 | 699 | // Do ciphertext stealing (CTS) to en/decrypt the last full block and |
698 | | - // the partial block. CTS needs two tweaks. TWEAK0_XMM contains the |
699 | | - // next tweak; compute the one after that. Decryption uses these two |
700 | | - // tweaks in reverse order, so also define aliases to handle that. |
701 | | - _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM |
| 700 | + // the partial block. TWEAK0_XMM contains the next tweak. |
| 701 | + |
702 | 702 | .if \enc |
703 | | - .set CTS_TWEAK0, TWEAK0_XMM |
704 | | - .set CTS_TWEAK1, TWEAK1_XMM |
| 703 | + // If encrypting, the main loop already encrypted the last full block to |
| 704 | + // create the CTS intermediate ciphertext. Prepare for the rest of CTS |
| 705 | + // by rewinding the pointers and loading the intermediate ciphertext. |
| 706 | + sub $16, SRC |
| 707 | + sub $16, DST |
| 708 | + vmovdqu (DST), %xmm0 |
705 | 709 | .else |
706 | | - .set CTS_TWEAK0, TWEAK1_XMM |
707 | | - .set CTS_TWEAK1, TWEAK0_XMM |
708 | | -.endif |
709 | | - |
710 | | - // En/decrypt the last full block. |
| 710 | + // If decrypting, the main loop didn't decrypt the last full block |
| 711 | + // because CTS decryption uses the last two tweaks in reverse order. |
| 712 | + // Do it now by advancing the tweak and decrypting the last full block. |
| 713 | + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM |
711 | 714 | vmovdqu (SRC), %xmm0 |
712 | | - _aes_crypt \enc, _XMM, CTS_TWEAK0, %xmm0 |
| 715 | + _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 |
| 716 | +.endif |
713 | 717 |
|
714 | 718 | .if USE_AVX10 |
715 | 719 | // Create a mask that has the first LEN bits set. |
716 | 720 | mov $-1, %rax |
717 | 721 | bzhi LEN, %rax, %rax |
718 | 722 | kmovq %rax, %k1 |
719 | 723 |
|
720 | | - // Swap the first LEN bytes of the above result with the partial block. |
721 | | - // Note that to support in-place en/decryption, the load from the src |
722 | | - // partial block must happen before the store to the dst partial block. |
| 724 | + // Swap the first LEN bytes of the en/decryption of the last full block |
| 725 | + // with the partial block. Note that to support in-place en/decryption, |
| 726 | + // the load from the src partial block must happen before the store to |
| 727 | + // the dst partial block. |
723 | 728 | vmovdqa %xmm0, %xmm1 |
724 | 729 | vmovdqu8 16(SRC), %xmm0{%k1} |
725 | 730 | vmovdqu8 %xmm1, 16(DST){%k1} |
|
750 | 755 | vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 |
751 | 756 | .endif |
752 | 757 | // En/decrypt again and store the last full block. |
753 | | - _aes_crypt \enc, _XMM, CTS_TWEAK1, %xmm0 |
| 758 | + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 |
754 | 759 | vmovdqu %xmm0, (DST) |
755 | 760 | jmp .Ldone\@ |
756 | 761 | .endm |
|
0 commit comments