Skip to content

Commit 4ca24d6

Browse files
author
Eric Biggers
committed
lib/crypto: sha256: Add support for 2-way interleaved hashing
Many arm64 and x86_64 CPUs can compute two SHA-256 hashes in nearly the same speed as one, if the instructions are interleaved. This is because SHA-256 is serialized block-by-block, and two interleaved hashes take much better advantage of the CPU's instruction-level parallelism. Meanwhile, a very common use case for SHA-256 hashing in the Linux kernel is dm-verity and fs-verity. Both use a Merkle tree that has a fixed block size, usually 4096 bytes with an empty or 32-byte salt prepended. Usually, many blocks need to be hashed at a time. This is an ideal scenario for 2-way interleaved hashing. To enable this optimization, add a new function sha256_finup_2x() to the SHA-256 library API. It computes the hash of two equal-length messages, starting from a common initial context. For now it always falls back to sequential processing. Later patches will wire up arm64 and x86_64 optimized implementations. Note that the interleaving factor could in principle be higher than 2x. However, that runs into many practical difficulties and CPU throughput limitations. Thus, both the implementations I'm adding are 2x. In the interest of using the simplest solution, the API matches that. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20250915160819.140019-2-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
1 parent f0883b9 commit 4ca24d6

2 files changed

Lines changed: 94 additions & 5 deletions

File tree

include/crypto/sha2.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,34 @@ void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]);
375375
*/
376376
void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]);
377377

378+
/**
379+
* sha256_finup_2x() - Compute two SHA-256 digests from a common initial
380+
* context. On some CPUs, this is faster than sequentially
381+
* computing each digest.
382+
* @ctx: an optional initial context, which may have already processed data. If
383+
* NULL, a default initial context is used (equivalent to sha256_init()).
384+
* @data1: data for the first message
385+
* @data2: data for the second message
386+
* @len: the length of each of @data1 and @data2, in bytes
387+
* @out1: (output) the first SHA-256 message digest
388+
* @out2: (output) the second SHA-256 message digest
389+
*
390+
* Context: Any context.
391+
*/
392+
void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1,
393+
const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE],
394+
u8 out2[SHA256_DIGEST_SIZE]);
395+
396+
/**
397+
* sha256_finup_2x_is_optimized() - Check if sha256_finup_2x() is using a real
398+
* interleaved implementation, as opposed to a
399+
* sequential fallback
400+
* @return: true if optimized
401+
*
402+
* Context: Any context.
403+
*/
404+
bool sha256_finup_2x_is_optimized(void);
405+
378406
/**
379407
* struct hmac_sha256_key - Prepared key for HMAC-SHA256
380408
* @key: private

lib/crypto/sha256.c

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,20 @@ static const struct sha256_block_state sha224_iv = {
2525
},
2626
};
2727

28-
static const struct sha256_block_state sha256_iv = {
29-
.h = {
30-
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
31-
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
28+
static const struct sha256_ctx initial_sha256_ctx = {
29+
.ctx = {
30+
.state = {
31+
.h = {
32+
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
33+
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
34+
},
35+
},
36+
.bytecount = 0,
3237
},
3338
};
3439

40+
#define sha256_iv (initial_sha256_ctx.ctx.state)
41+
3542
static const u32 sha256_K[64] = {
3643
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
3744
0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -261,8 +268,62 @@ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE])
261268
}
262269
EXPORT_SYMBOL(sha256);
263270

264-
/* pre-boot environment (as indicated by __DISABLE_EXPORTS) doesn't need HMAC */
271+
/*
272+
* Pre-boot environment (as indicated by __DISABLE_EXPORTS being defined)
273+
* doesn't need either HMAC support or interleaved hashing support
274+
*/
265275
#ifndef __DISABLE_EXPORTS
276+
277+
#ifndef sha256_finup_2x_arch
278+
static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
279+
const u8 *data1, const u8 *data2, size_t len,
280+
u8 out1[SHA256_DIGEST_SIZE],
281+
u8 out2[SHA256_DIGEST_SIZE])
282+
{
283+
return false;
284+
}
285+
static bool sha256_finup_2x_is_optimized_arch(void)
286+
{
287+
return false;
288+
}
289+
#endif
290+
291+
/* Sequential fallback implementation of sha256_finup_2x() */
292+
static noinline_for_stack void sha256_finup_2x_sequential(
293+
const struct __sha256_ctx *ctx, const u8 *data1, const u8 *data2,
294+
size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE])
295+
{
296+
struct __sha256_ctx mut_ctx;
297+
298+
mut_ctx = *ctx;
299+
__sha256_update(&mut_ctx, data1, len);
300+
__sha256_final(&mut_ctx, out1, SHA256_DIGEST_SIZE);
301+
302+
mut_ctx = *ctx;
303+
__sha256_update(&mut_ctx, data2, len);
304+
__sha256_final(&mut_ctx, out2, SHA256_DIGEST_SIZE);
305+
}
306+
307+
void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1,
308+
const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE],
309+
u8 out2[SHA256_DIGEST_SIZE])
310+
{
311+
if (ctx == NULL)
312+
ctx = &initial_sha256_ctx;
313+
314+
if (likely(sha256_finup_2x_arch(&ctx->ctx, data1, data2, len, out1,
315+
out2)))
316+
return;
317+
sha256_finup_2x_sequential(&ctx->ctx, data1, data2, len, out1, out2);
318+
}
319+
EXPORT_SYMBOL_GPL(sha256_finup_2x);
320+
321+
bool sha256_finup_2x_is_optimized(void)
322+
{
323+
return sha256_finup_2x_is_optimized_arch();
324+
}
325+
EXPORT_SYMBOL_GPL(sha256_finup_2x_is_optimized);
326+
266327
static void __hmac_sha256_preparekey(struct sha256_block_state *istate,
267328
struct sha256_block_state *ostate,
268329
const u8 *raw_key, size_t raw_key_len,

0 commit comments

Comments
 (0)