Skip to content

Commit 42e3376

Browse files
author
Eric Biggers
committed
lib/crypto: x86/sha1-ni: Convert to use rounds macros
The assembly code that does all 80 rounds of SHA-1 is highly repetitive. Replace it with 20 expansions of a macro that does 4 rounds, using the macro arguments and .if directives to handle the slight variations between rounds. This reduces the length of sha1-ni-asm.S by 129 lines while still producing the exact same object file. This mirrors sha256-ni-asm.S which uses this same strategy. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20250718191900.42877-3-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
1 parent f88ed14 commit 42e3376

1 file changed

Lines changed: 29 additions & 158 deletions

File tree

lib/crypto/x86/sha1-ni-asm.S

Lines changed: 29 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,29 @@
7070
#define ABCD_SAVED %xmm8
7171
#define E0_SAVED %xmm9
7272

73+
.macro do_4rounds i, m0, m1, m2, m3, e0, e1
74+
.if \i < 16
75+
movdqu \i*4(DATA_PTR), \m0
76+
pshufb SHUF_MASK, \m0
77+
.endif
78+
.if \i == 0
79+
paddd \m0, \e0
80+
.else
81+
sha1nexte \m0, \e0
82+
.endif
83+
movdqa ABCD, \e1
84+
.if \i >= 12 && \i < 76
85+
sha1msg2 \m0, \m1
86+
.endif
87+
sha1rnds4 $\i / 20, \e0, ABCD
88+
.if \i >= 4 && \i < 68
89+
sha1msg1 \m0, \m3
90+
.endif
91+
.if \i >= 8 && \i < 72
92+
pxor \m0, \m2
93+
.endif
94+
.endm
95+
7396
/*
7497
* Intel SHA Extensions optimized implementation of a SHA-1 block function
7598
*
@@ -80,9 +103,6 @@
80103
* processes complete blocks. State initialization, buffering of partial
81104
* blocks, and digest finalization are expected to be handled elsewhere.
82105
*
83-
* The indented lines in the loop are instructions related to rounds processing.
84-
* The non-indented lines are instructions related to the message schedule.
85-
*
86106
* void sha1_ni_transform(struct sha1_block_state *state,
87107
* const u8 *data, size_t nblocks)
88108
*/
@@ -102,161 +122,12 @@ SYM_FUNC_START(sha1_ni_transform)
102122
movdqa E0, E0_SAVED
103123
movdqa ABCD, ABCD_SAVED
104124

105-
/* Rounds 0-3 */
106-
movdqu 0*16(DATA_PTR), MSG0
107-
pshufb SHUF_MASK, MSG0
108-
paddd MSG0, E0
109-
movdqa ABCD, E1
110-
sha1rnds4 $0, E0, ABCD
111-
112-
/* Rounds 4-7 */
113-
movdqu 1*16(DATA_PTR), MSG1
114-
pshufb SHUF_MASK, MSG1
115-
sha1nexte MSG1, E1
116-
movdqa ABCD, E0
117-
sha1rnds4 $0, E1, ABCD
118-
sha1msg1 MSG1, MSG0
119-
120-
/* Rounds 8-11 */
121-
movdqu 2*16(DATA_PTR), MSG2
122-
pshufb SHUF_MASK, MSG2
123-
sha1nexte MSG2, E0
124-
movdqa ABCD, E1
125-
sha1rnds4 $0, E0, ABCD
126-
sha1msg1 MSG2, MSG1
127-
pxor MSG2, MSG0
128-
129-
/* Rounds 12-15 */
130-
movdqu 3*16(DATA_PTR), MSG3
131-
pshufb SHUF_MASK, MSG3
132-
sha1nexte MSG3, E1
133-
movdqa ABCD, E0
134-
sha1msg2 MSG3, MSG0
135-
sha1rnds4 $0, E1, ABCD
136-
sha1msg1 MSG3, MSG2
137-
pxor MSG3, MSG1
138-
139-
/* Rounds 16-19 */
140-
sha1nexte MSG0, E0
141-
movdqa ABCD, E1
142-
sha1msg2 MSG0, MSG1
143-
sha1rnds4 $0, E0, ABCD
144-
sha1msg1 MSG0, MSG3
145-
pxor MSG0, MSG2
146-
147-
/* Rounds 20-23 */
148-
sha1nexte MSG1, E1
149-
movdqa ABCD, E0
150-
sha1msg2 MSG1, MSG2
151-
sha1rnds4 $1, E1, ABCD
152-
sha1msg1 MSG1, MSG0
153-
pxor MSG1, MSG3
154-
155-
/* Rounds 24-27 */
156-
sha1nexte MSG2, E0
157-
movdqa ABCD, E1
158-
sha1msg2 MSG2, MSG3
159-
sha1rnds4 $1, E0, ABCD
160-
sha1msg1 MSG2, MSG1
161-
pxor MSG2, MSG0
162-
163-
/* Rounds 28-31 */
164-
sha1nexte MSG3, E1
165-
movdqa ABCD, E0
166-
sha1msg2 MSG3, MSG0
167-
sha1rnds4 $1, E1, ABCD
168-
sha1msg1 MSG3, MSG2
169-
pxor MSG3, MSG1
170-
171-
/* Rounds 32-35 */
172-
sha1nexte MSG0, E0
173-
movdqa ABCD, E1
174-
sha1msg2 MSG0, MSG1
175-
sha1rnds4 $1, E0, ABCD
176-
sha1msg1 MSG0, MSG3
177-
pxor MSG0, MSG2
178-
179-
/* Rounds 36-39 */
180-
sha1nexte MSG1, E1
181-
movdqa ABCD, E0
182-
sha1msg2 MSG1, MSG2
183-
sha1rnds4 $1, E1, ABCD
184-
sha1msg1 MSG1, MSG0
185-
pxor MSG1, MSG3
186-
187-
/* Rounds 40-43 */
188-
sha1nexte MSG2, E0
189-
movdqa ABCD, E1
190-
sha1msg2 MSG2, MSG3
191-
sha1rnds4 $2, E0, ABCD
192-
sha1msg1 MSG2, MSG1
193-
pxor MSG2, MSG0
194-
195-
/* Rounds 44-47 */
196-
sha1nexte MSG3, E1
197-
movdqa ABCD, E0
198-
sha1msg2 MSG3, MSG0
199-
sha1rnds4 $2, E1, ABCD
200-
sha1msg1 MSG3, MSG2
201-
pxor MSG3, MSG1
202-
203-
/* Rounds 48-51 */
204-
sha1nexte MSG0, E0
205-
movdqa ABCD, E1
206-
sha1msg2 MSG0, MSG1
207-
sha1rnds4 $2, E0, ABCD
208-
sha1msg1 MSG0, MSG3
209-
pxor MSG0, MSG2
210-
211-
/* Rounds 52-55 */
212-
sha1nexte MSG1, E1
213-
movdqa ABCD, E0
214-
sha1msg2 MSG1, MSG2
215-
sha1rnds4 $2, E1, ABCD
216-
sha1msg1 MSG1, MSG0
217-
pxor MSG1, MSG3
218-
219-
/* Rounds 56-59 */
220-
sha1nexte MSG2, E0
221-
movdqa ABCD, E1
222-
sha1msg2 MSG2, MSG3
223-
sha1rnds4 $2, E0, ABCD
224-
sha1msg1 MSG2, MSG1
225-
pxor MSG2, MSG0
226-
227-
/* Rounds 60-63 */
228-
sha1nexte MSG3, E1
229-
movdqa ABCD, E0
230-
sha1msg2 MSG3, MSG0
231-
sha1rnds4 $3, E1, ABCD
232-
sha1msg1 MSG3, MSG2
233-
pxor MSG3, MSG1
234-
235-
/* Rounds 64-67 */
236-
sha1nexte MSG0, E0
237-
movdqa ABCD, E1
238-
sha1msg2 MSG0, MSG1
239-
sha1rnds4 $3, E0, ABCD
240-
sha1msg1 MSG0, MSG3
241-
pxor MSG0, MSG2
242-
243-
/* Rounds 68-71 */
244-
sha1nexte MSG1, E1
245-
movdqa ABCD, E0
246-
sha1msg2 MSG1, MSG2
247-
sha1rnds4 $3, E1, ABCD
248-
pxor MSG1, MSG3
249-
250-
/* Rounds 72-75 */
251-
sha1nexte MSG2, E0
252-
movdqa ABCD, E1
253-
sha1msg2 MSG2, MSG3
254-
sha1rnds4 $3, E0, ABCD
255-
256-
/* Rounds 76-79 */
257-
sha1nexte MSG3, E1
258-
movdqa ABCD, E0
259-
sha1rnds4 $3, E1, ABCD
125+
.irp i, 0, 16, 32, 48, 64
126+
do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3, E0, E1
127+
do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0, E1, E0
128+
do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1, E0, E1
129+
do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2, E1, E0
130+
.endr
260131

261132
/* Add the previous state (before the rounds) to the current state. */
262133
sha1nexte E0_SAVED, E0

0 commit comments

Comments
 (0)