Skip to content

Commit 23f57ed

Browse files
Eric BiggersMikulas Patocka
authored andcommitted
dm-verity: use 2-way interleaved SHA-256 hashing when supported
When the crypto library provides an optimized implementation of sha256_finup_2x(), use it to interleave the hashing of pairs of data blocks. On some CPUs this nearly doubles hashing performance. The increase in overall throughput of cold-cache dm-verity reads that I'm seeing on arm64 and x86_64 is roughly 35% (though this metric is hard to measure as it jumps around a lot). For now this is done only on data blocks, not Merkle tree blocks. We could use sha256_finup_2x() on Merkle tree blocks too, but that is less important as there aren't as many Merkle tree blocks as data blocks, and that would require some additional code restructuring. Signed-off-by: Eric Biggers <ebiggers@kernel.org> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
1 parent 379475d commit 23f57ed

2 files changed

Lines changed: 103 additions & 41 deletions

File tree

drivers/md/dm-verity-target.c

Lines changed: 86 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,12 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
417417
static int verity_handle_data_hash_mismatch(struct dm_verity *v,
418418
struct dm_verity_io *io,
419419
struct bio *bio,
420-
const u8 *want_digest,
421-
sector_t blkno, u8 *data)
420+
struct pending_block *block)
422421
{
422+
const u8 *want_digest = block->want_digest;
423+
sector_t blkno = block->blkno;
424+
u8 *data = block->data;
425+
423426
if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
424427
/*
425428
* Error handling code (FEC included) cannot be run in the
@@ -448,17 +451,73 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
448451
return 0;
449452
}
450453

454+
static void verity_clear_pending_blocks(struct dm_verity_io *io)
455+
{
456+
int i;
457+
458+
for (i = io->num_pending - 1; i >= 0; i--) {
459+
kunmap_local(io->pending_blocks[i].data);
460+
io->pending_blocks[i].data = NULL;
461+
}
462+
io->num_pending = 0;
463+
}
464+
465+
static int verity_verify_pending_blocks(struct dm_verity *v,
466+
struct dm_verity_io *io,
467+
struct bio *bio)
468+
{
469+
const unsigned int block_size = 1 << v->data_dev_block_bits;
470+
int i, r;
471+
472+
if (io->num_pending == 2) {
473+
/* num_pending == 2 implies that the algorithm is SHA-256 */
474+
sha256_finup_2x(v->initial_hashstate.sha256,
475+
io->pending_blocks[0].data,
476+
io->pending_blocks[1].data, block_size,
477+
io->pending_blocks[0].real_digest,
478+
io->pending_blocks[1].real_digest);
479+
} else {
480+
for (i = 0; i < io->num_pending; i++) {
481+
r = verity_hash(v, io, io->pending_blocks[i].data,
482+
block_size,
483+
io->pending_blocks[i].real_digest);
484+
if (unlikely(r))
485+
return r;
486+
}
487+
}
488+
489+
for (i = 0; i < io->num_pending; i++) {
490+
struct pending_block *block = &io->pending_blocks[i];
491+
492+
if (likely(memcmp(block->real_digest, block->want_digest,
493+
v->digest_size) == 0)) {
494+
if (v->validated_blocks)
495+
set_bit(block->blkno, v->validated_blocks);
496+
} else {
497+
r = verity_handle_data_hash_mismatch(v, io, bio, block);
498+
if (unlikely(r))
499+
return r;
500+
}
501+
}
502+
verity_clear_pending_blocks(io);
503+
return 0;
504+
}
505+
451506
/*
452507
* Verify one "dm_verity_io" structure.
453508
*/
454509
static int verity_verify_io(struct dm_verity_io *io)
455510
{
456511
struct dm_verity *v = io->v;
457512
const unsigned int block_size = 1 << v->data_dev_block_bits;
513+
const int max_pending = v->use_sha256_finup_2x ? 2 : 1;
458514
struct bvec_iter iter_copy;
459515
struct bvec_iter *iter;
460516
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
461517
unsigned int b;
518+
int r;
519+
520+
io->num_pending = 0;
462521

463522
if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
464523
/*
@@ -472,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io)
472531

473532
for (b = 0; b < io->n_blocks;
474533
b++, bio_advance_iter(bio, iter, block_size)) {
475-
int r;
476-
sector_t cur_block = io->block + b;
534+
sector_t blkno = io->block + b;
535+
struct pending_block *block;
477536
bool is_zero;
478537
struct bio_vec bv;
479538
void *data;
480539

481540
if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
482-
likely(test_bit(cur_block, v->validated_blocks)))
541+
likely(test_bit(blkno, v->validated_blocks)))
483542
continue;
484543

485-
r = verity_hash_for_block(v, io, cur_block,
486-
verity_io_want_digest(v, io),
544+
block = &io->pending_blocks[io->num_pending];
545+
546+
r = verity_hash_for_block(v, io, blkno, block->want_digest,
487547
&is_zero);
488548
if (unlikely(r < 0))
489-
return r;
549+
goto error;
490550

491551
bv = bio_iter_iovec(bio, *iter);
492552
if (unlikely(bv.bv_len < block_size)) {
@@ -497,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io)
497557
* data block size to be greater than PAGE_SIZE.
498558
*/
499559
DMERR_LIMIT("unaligned io (data block spans pages)");
500-
return -EIO;
560+
r = -EIO;
561+
goto error;
501562
}
502563

503564
data = bvec_kmap_local(&bv);
@@ -511,30 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io)
511572
kunmap_local(data);
512573
continue;
513574
}
514-
515-
r = verity_hash(v, io, data, block_size,
516-
verity_io_real_digest(v, io));
517-
if (unlikely(r < 0)) {
518-
kunmap_local(data);
519-
return r;
575+
block->data = data;
576+
block->blkno = blkno;
577+
if (++io->num_pending == max_pending) {
578+
r = verity_verify_pending_blocks(v, io, bio);
579+
if (unlikely(r))
580+
goto error;
520581
}
582+
}
521583

522-
if (likely(memcmp(verity_io_real_digest(v, io),
523-
verity_io_want_digest(v, io), v->digest_size) == 0)) {
524-
if (v->validated_blocks)
525-
set_bit(cur_block, v->validated_blocks);
526-
kunmap_local(data);
527-
continue;
528-
}
529-
r = verity_handle_data_hash_mismatch(v, io, bio,
530-
verity_io_want_digest(v, io),
531-
cur_block, data);
532-
kunmap_local(data);
584+
if (io->num_pending) {
585+
r = verity_verify_pending_blocks(v, io, bio);
533586
if (unlikely(r))
534-
return r;
587+
goto error;
535588
}
536589

537590
return 0;
591+
592+
error:
593+
verity_clear_pending_blocks(io);
594+
return r;
538595
}
539596

540597
/*
@@ -1277,6 +1334,8 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
12771334
* interleaved hashing support.
12781335
*/
12791336
v->use_sha256_lib = true;
1337+
if (sha256_finup_2x_is_optimized())
1338+
v->use_sha256_finup_2x = true;
12801339
ti->per_io_data_size =
12811340
offsetofend(struct dm_verity_io, hash_ctx.sha256);
12821341
} else {

drivers/md/dm-verity.h

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct dm_verity {
6464
bool hash_failed:1; /* set if hash of any block failed */
6565
bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
6666
bool use_sha256_lib:1; /* use SHA-256 library instead of generic crypto API */
67+
bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */
6768
unsigned int digest_size; /* digest size for the current hash algorithm */
6869
enum verity_mode mode; /* mode for handling verification errors */
6970
enum verity_mode error_mode;/* mode for handling I/O errors */
@@ -83,6 +84,13 @@ struct dm_verity {
8384
mempool_t recheck_pool;
8485
};
8586

87+
struct pending_block {
88+
void *data;
89+
sector_t blkno;
90+
u8 want_digest[HASH_MAX_DIGESTSIZE];
91+
u8 real_digest[HASH_MAX_DIGESTSIZE];
92+
};
93+
8694
struct dm_verity_io {
8795
struct dm_verity *v;
8896

@@ -100,8 +108,15 @@ struct dm_verity_io {
100108
struct work_struct bh_work;
101109

102110
u8 tmp_digest[HASH_MAX_DIGESTSIZE];
103-
u8 real_digest[HASH_MAX_DIGESTSIZE];
104-
u8 want_digest[HASH_MAX_DIGESTSIZE];
111+
112+
/*
113+
* This is the queue of data blocks that are pending verification. When
114+
* the crypto layer supports interleaved hashing, we allow multiple
115+
* blocks to be queued up in order to utilize it. This can improve
116+
* performance significantly vs. sequential hashing of each block.
117+
*/
118+
int num_pending;
119+
struct pending_block pending_blocks[2];
105120

106121
/*
107122
* Temporary space for hashing. Either sha256 or shash is used,
@@ -116,18 +131,6 @@ struct dm_verity_io {
116131
} hash_ctx;
117132
};
118133

119-
static inline u8 *verity_io_real_digest(struct dm_verity *v,
120-
struct dm_verity_io *io)
121-
{
122-
return io->real_digest;
123-
}
124-
125-
static inline u8 *verity_io_want_digest(struct dm_verity *v,
126-
struct dm_verity_io *io)
127-
{
128-
return io->want_digest;
129-
}
130-
131134
extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
132135
const u8 *data, size_t len, u8 *digest);
133136

0 commit comments

Comments
 (0)