Skip to content

Commit 85ab365

Browse files
author
Kent Overstreet
committed
bcachefs: Fix deadlock in journal write path
bch2_journal_write() was incorrectly waiting on earlier journal writes synchronously; this usually worked because most of the time we'd be running in the context of a thread that did a journal_buf_put(), but sometimes we'd be running out of the same workqueue that completes those prior journal writes. Additionally, this makes sure to punt to a workqueue before submitting preflushes - we really don't want to be calling submit_bio() in the main transaction commit path. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
1 parent adfe935 commit 85ab365

1 file changed

Lines changed: 42 additions & 18 deletions

File tree

fs/bcachefs/journal_io.c

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio)
17231723
percpu_ref_put(&ca->io_ref);
17241724
}
17251725

1726-
static CLOSURE_CALLBACK(do_journal_write)
1726+
static CLOSURE_CALLBACK(journal_write_submit)
17271727
{
17281728
closure_type(w, struct journal_buf, io);
17291729
struct journal *j = container_of(w, struct journal, buf[w->idx]);
@@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write)
17681768
continue_at(cl, journal_write_done, j->wq);
17691769
}
17701770

1771+
static CLOSURE_CALLBACK(journal_write_preflush)
1772+
{
1773+
closure_type(w, struct journal_buf, io);
1774+
struct journal *j = container_of(w, struct journal, buf[w->idx]);
1775+
struct bch_fs *c = container_of(j, struct bch_fs, journal);
1776+
1777+
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
1778+
spin_lock(&j->lock);
1779+
closure_wait(&j->async_wait, cl);
1780+
spin_unlock(&j->lock);
1781+
1782+
continue_at(cl, journal_write_preflush, j->wq);
1783+
return;
1784+
}
1785+
1786+
if (w->separate_flush) {
1787+
for_each_rw_member(c, ca) {
1788+
percpu_ref_get(&ca->io_ref);
1789+
1790+
struct journal_device *ja = &ca->journal;
1791+
struct bio *bio = &ja->bio[w->idx]->bio;
1792+
bio_reset(bio, ca->disk_sb.bdev,
1793+
REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
1794+
bio->bi_end_io = journal_write_endio;
1795+
bio->bi_private = ca;
1796+
closure_bio_submit(bio, cl);
1797+
}
1798+
1799+
continue_at(cl, journal_write_submit, j->wq);
1800+
} else {
1801+
/*
1802+
* no need to punt to another work item if we're not waiting on
1803+
* preflushes
1804+
*/
1805+
journal_write_submit(&cl->work);
1806+
}
1807+
}
1808+
17711809
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
17721810
{
17731811
struct bch_fs *c = container_of(j, struct bch_fs, journal);
@@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
20332071
goto err;
20342072

20352073
if (!JSET_NO_FLUSH(w->data))
2036-
closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
2037-
2038-
if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
2039-
for_each_rw_member(c, ca) {
2040-
percpu_ref_get(&ca->io_ref);
2041-
2042-
struct journal_device *ja = &ca->journal;
2043-
struct bio *bio = &ja->bio[w->idx]->bio;
2044-
bio_reset(bio, ca->disk_sb.bdev,
2045-
REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
2046-
bio->bi_end_io = journal_write_endio;
2047-
bio->bi_private = ca;
2048-
closure_bio_submit(bio, cl);
2049-
}
2050-
}
2051-
2052-
continue_at(cl, do_journal_write, j->wq);
2074+
continue_at(cl, journal_write_preflush, j->wq);
2075+
else
2076+
continue_at(cl, journal_write_submit, j->wq);
20532077
return;
20542078
no_io:
20552079
continue_at(cl, journal_write_done, j->wq);

0 commit comments

Comments
 (0)