@@ -245,52 +245,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
245245 movdqu 0x30 (STATEP), STATE3
246246 movdqu 0x40 (STATEP), STATE4
247247
248- mov SRC, %r8
249- and $0xF , %r8
250- jnz .Lad_u_loop
251-
252- .align 8
253- .Lad_a_loop:
254- movdqa 0x00 (SRC), MSG
255- aegis128_update
256- pxor MSG, STATE4
257- sub $0x10 , LEN
258- cmp $0x10 , LEN
259- jl .Lad_out_1
260-
261- movdqa 0x10 (SRC), MSG
262- aegis128_update
263- pxor MSG, STATE3
264- sub $0x10 , LEN
265- cmp $0x10 , LEN
266- jl .Lad_out_2
267-
268- movdqa 0x20 (SRC), MSG
269- aegis128_update
270- pxor MSG, STATE2
271- sub $0x10 , LEN
272- cmp $0x10 , LEN
273- jl .Lad_out_3
274-
275- movdqa 0x30 (SRC), MSG
276- aegis128_update
277- pxor MSG, STATE1
278- sub $0x10 , LEN
279- cmp $0x10 , LEN
280- jl .Lad_out_4
281-
282- movdqa 0x40 (SRC), MSG
283- aegis128_update
284- pxor MSG, STATE0
285- sub $0x10 , LEN
286- cmp $0x10 , LEN
287- jl .Lad_out_0
288-
289- add $0x50 , SRC
290- jmp .Lad_a_loop
291-
292248.align 8
293- .Lad_u_loop :
249+ .Lad_loop :
294250 movdqu 0x00 (SRC), MSG
295251 aegis128_update
296252 pxor MSG, STATE4
@@ -327,7 +283,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
327283 jl .Lad_out_0
328284
329285 add $0x50 , SRC
330- jmp .Lad_u_loop
286+ jmp .Lad_loop
331287
332288 /* store the state: */
333289.Lad_out_0:
@@ -380,15 +336,15 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
380336 RET
381337SYM_FUNC_END(crypto_aegis128_aesni_ad)
382338
383- .macro encrypt_block a s0 s1 s2 s3 s4 i
384- movdq \a (\i * 0x10 )(SRC), MSG
339+ .macro encrypt_block s0 s1 s2 s3 s4 i
340+ movdqu (\i * 0x10 )(SRC), MSG
385341 movdqa MSG, T0
386342 pxor \s1, T0
387343 pxor \s4, T0
388344 movdqa \s2, T1
389345 pand \s3, T1
390346 pxor T1, T0
391- movdq \a T0, (\i * 0x10 )(DST)
347+ movdqu T0, (\i * 0x10 )(DST)
392348
393349 aegis128_update
394350 pxor MSG, \s4
@@ -415,34 +371,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
415371 movdqu 0x30 (STATEP), STATE3
416372 movdqu 0x40 (STATEP), STATE4
417373
418- mov SRC, %r8
419- or DST, %r8
420- and $0xF , %r8
421- jnz .Lenc_u_loop
422-
423374.align 8
424- .Lenc_a_loop :
425- encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
426- encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
427- encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
428- encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
429- encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
375+ .Lenc_loop :
376+ encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
377+ encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
378+ encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
379+ encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
380+ encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
430381
431382 add $0x50 , SRC
432383 add $0x50 , DST
433- jmp .Lenc_a_loop
434-
435- .align 8
436- .Lenc_u_loop:
437- encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
438- encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
439- encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
440- encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
441- encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
442-
443- add $0x50 , SRC
444- add $0x50 , DST
445- jmp .Lenc_u_loop
384+ jmp .Lenc_loop
446385
447386 /* store the state: */
448387.Lenc_out_0:
@@ -535,14 +474,14 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
535474 RET
536475SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
537476
538- .macro decrypt_block a s0 s1 s2 s3 s4 i
539- movdq \a (\i * 0x10 )(SRC), MSG
477+ .macro decrypt_block s0 s1 s2 s3 s4 i
478+ movdqu (\i * 0x10 )(SRC), MSG
540479 pxor \s1, MSG
541480 pxor \s4, MSG
542481 movdqa \s2, T1
543482 pand \s3, T1
544483 pxor T1, MSG
545- movdq \a MSG, (\i * 0x10 )(DST)
484+ movdqu MSG, (\i * 0x10 )(DST)
546485
547486 aegis128_update
548487 pxor MSG, \s4
@@ -569,34 +508,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
569508 movdqu 0x30 (STATEP), STATE3
570509 movdqu 0x40 (STATEP), STATE4
571510
572- mov SRC, %r8
573- or DST, %r8
574- and $0xF , %r8
575- jnz .Ldec_u_loop
576-
577- .align 8
578- .Ldec_a_loop:
579- decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
580- decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
581- decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
582- decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
583- decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
584-
585- add $0x50 , SRC
586- add $0x50 , DST
587- jmp .Ldec_a_loop
588-
589511.align 8
590- .Ldec_u_loop :
591- decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
592- decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
593- decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
594- decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
595- decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
512+ .Ldec_loop :
513+ decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
514+ decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
515+ decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
516+ decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
517+ decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
596518
597519 add $0x50 , SRC
598520 add $0x50 , DST
599- jmp .Ldec_u_loop
521+ jmp .Ldec_loop
600522
601523 /* store the state: */
602524.Ldec_out_0:
0 commit comments