Skip to content

Commit 8abec36

Browse files
Ignat Korchaginsnitm
authored andcommitted
dm crypt: do not wait for backlogged crypto request completion in softirq
Commit 39d42fa ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. When Crypto API backlogs a crypto request, dm-crypt uses wait_for_completion to avoid sending further requests to an already overloaded crypto driver. However, if the code is executing in softirq context, we might get the following stacktrace: [ 210.235213][ C0] BUG: scheduling while atomic: fio/2602/0x00000102 [ 210.236701][ C0] Modules linked in: [ 210.237566][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.239292][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.241233][ C0] Call Trace: [ 210.241946][ C0] <IRQ> [ 210.242561][ C0] dump_stack+0x7d/0xa3 [ 210.243466][ C0] __schedule_bug.cold+0xb3/0xc2 [ 210.244539][ C0] __schedule+0x156f/0x20d0 [ 210.245518][ C0] ? io_schedule_timeout+0x140/0x140 [ 210.246660][ C0] schedule+0xd0/0x270 [ 210.247541][ C0] schedule_timeout+0x1fb/0x280 [ 210.248586][ C0] ? usleep_range+0x150/0x150 [ 210.249624][ C0] ? unpoison_range+0x3a/0x60 [ 210.250632][ C0] ? ____kasan_kmalloc.constprop.0+0x82/0xa0 [ 210.251949][ C0] ? unpoison_range+0x3a/0x60 [ 210.252958][ C0] ? __prepare_to_swait+0xa7/0x190 [ 210.254067][ C0] do_wait_for_common+0x2ab/0x370 [ 210.255158][ C0] ? usleep_range+0x150/0x150 [ 210.256192][ C0] ? bit_wait_io_timeout+0x160/0x160 [ 210.257358][ C0] ? blk_update_request+0x757/0x1150 [ 210.258582][ C0] ? _raw_spin_lock_irq+0x82/0xd0 [ 210.259674][ C0] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 210.260917][ C0] wait_for_completion+0x4c/0x90 [ 210.261971][ C0] crypt_convert+0x19a6/0x4c00 [ 210.263033][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.264193][ C0] ? kasan_set_track+0x1c/0x30 [ 210.265191][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.266283][ C0] ? kmem_cache_free+0x104/0x470 [ 210.267363][ C0] ? crypt_endio+0x91/0x180 [ 210.268327][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.269565][ C0] blk_update_request+0x757/0x1150 [ 210.270563][ C0] blk_mq_end_request+0x4b/0x480 [ 210.271680][ C0] blk_done_softirq+0x21d/0x340 [ 210.272775][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.273847][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.275031][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.276182][ C0] __do_softirq+0x190/0x611 [ 210.277203][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.278340][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.279514][ C0] </IRQ> [ 210.280164][ C0] do_softirq_own_stack+0x37/0x40 [ 210.281281][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.282286][ C0] common_interrupt+0x74/0x120 [ 210.283376][ C0] asm_common_interrupt+0x1e/0x40 [ 210.284496][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 Fix this by making crypt_convert function reentrant from the point of a single bio and make dm-crypt defer further bio processing to a workqueue, if Crypto API backlogs a request in interrupt context. Fixes: 39d42fa ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin <ignat@cloudflare.com> Acked-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
1 parent b690bd5 commit 8abec36

1 file changed

Lines changed: 98 additions & 5 deletions

File tree

drivers/md/dm-crypt.c

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,13 +1529,19 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
15291529
* Encrypt / decrypt data from one bio to another one (can be the same one)
15301530
*/
15311531
static blk_status_t crypt_convert(struct crypt_config *cc,
1532-
struct convert_context *ctx, bool atomic)
1532+
struct convert_context *ctx, bool atomic, bool reset_pending)
15331533
{
15341534
unsigned int tag_offset = 0;
15351535
unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
15361536
int r;
15371537

1538-
atomic_set(&ctx->cc_pending, 1);
1538+
/*
1539+
* if reset_pending is set we are dealing with the bio for the first time,
1540+
* else we're continuing to work on the previous bio, so don't mess with
1541+
* the cc_pending counter
1542+
*/
1543+
if (reset_pending)
1544+
atomic_set(&ctx->cc_pending, 1);
15391545

15401546
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
15411547

@@ -1553,7 +1559,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
15531559
* but the driver request queue is full, let's wait.
15541560
*/
15551561
case -EBUSY:
1556-
wait_for_completion(&ctx->restart);
1562+
if (in_interrupt()) {
1563+
if (try_wait_for_completion(&ctx->restart)) {
1564+
/*
1565+
* we don't have to block to wait for completion,
1566+
* so proceed
1567+
*/
1568+
} else {
1569+
/*
1570+
* we can't wait for completion without blocking
1571+
* exit and continue processing in a workqueue
1572+
*/
1573+
ctx->r.req = NULL;
1574+
ctx->cc_sector += sector_step;
1575+
tag_offset++;
1576+
return BLK_STS_DEV_RESOURCE;
1577+
}
1578+
} else {
1579+
wait_for_completion(&ctx->restart);
1580+
}
15571581
reinit_completion(&ctx->restart);
15581582
fallthrough;
15591583
/*
@@ -1945,6 +1969,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
19451969
}
19461970
}
19471971

1972+
static void kcryptd_crypt_write_continue(struct work_struct *work)
1973+
{
1974+
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
1975+
struct crypt_config *cc = io->cc;
1976+
struct convert_context *ctx = &io->ctx;
1977+
int crypt_finished;
1978+
sector_t sector = io->sector;
1979+
blk_status_t r;
1980+
1981+
wait_for_completion(&ctx->restart);
1982+
reinit_completion(&ctx->restart);
1983+
1984+
r = crypt_convert(cc, &io->ctx, true, false);
1985+
if (r)
1986+
io->error = r;
1987+
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
1988+
if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
1989+
/* Wait for completion signaled by kcryptd_async_done() */
1990+
wait_for_completion(&ctx->restart);
1991+
crypt_finished = 1;
1992+
}
1993+
1994+
/* Encryption was already finished, submit io now */
1995+
if (crypt_finished) {
1996+
kcryptd_crypt_write_io_submit(io, 0);
1997+
io->sector = sector;
1998+
}
1999+
2000+
crypt_dec_pending(io);
2001+
}
2002+
19482003
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
19492004
{
19502005
struct crypt_config *cc = io->cc;
@@ -1973,7 +2028,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
19732028

19742029
crypt_inc_pending(io);
19752030
r = crypt_convert(cc, ctx,
1976-
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
2031+
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
2032+
/*
2033+
* Crypto API backlogged the request, because its queue was full
2034+
* and we're in softirq context, so continue from a workqueue
2035+
* (TODO: is it actually possible to be in softirq in the write path?)
2036+
*/
2037+
if (r == BLK_STS_DEV_RESOURCE) {
2038+
INIT_WORK(&io->work, kcryptd_crypt_write_continue);
2039+
queue_work(cc->crypt_queue, &io->work);
2040+
return;
2041+
}
19772042
if (r)
19782043
io->error = r;
19792044
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -1998,6 +2063,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
19982063
crypt_dec_pending(io);
19992064
}
20002065

2066+
static void kcryptd_crypt_read_continue(struct work_struct *work)
2067+
{
2068+
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
2069+
struct crypt_config *cc = io->cc;
2070+
blk_status_t r;
2071+
2072+
wait_for_completion(&io->ctx.restart);
2073+
reinit_completion(&io->ctx.restart);
2074+
2075+
r = crypt_convert(cc, &io->ctx, true, false);
2076+
if (r)
2077+
io->error = r;
2078+
2079+
if (atomic_dec_and_test(&io->ctx.cc_pending))
2080+
kcryptd_crypt_read_done(io);
2081+
2082+
crypt_dec_pending(io);
2083+
}
2084+
20012085
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
20022086
{
20032087
struct crypt_config *cc = io->cc;
@@ -2009,7 +2093,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
20092093
io->sector);
20102094

20112095
r = crypt_convert(cc, &io->ctx,
2012-
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
2096+
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
2097+
/*
2098+
* Crypto API backlogged the request, because its queue was full
2099+
* and we're in softirq context, so continue from a workqueue
2100+
*/
2101+
if (r == BLK_STS_DEV_RESOURCE) {
2102+
INIT_WORK(&io->work, kcryptd_crypt_read_continue);
2103+
queue_work(cc->crypt_queue, &io->work);
2104+
return;
2105+
}
20132106
if (r)
20142107
io->error = r;
20152108

0 commit comments

Comments
 (0)