Skip to content

Commit 01d550f

Browse files
committed
Merge tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: "Pretty quiet round this time around. This contains: - NVMe updates via Keith: - nvme fabrics spec updates (Guixin, Max) - nvme target udpates (Guixin, Evan) - nvme attribute refactoring (Daniel) - nvme-fc numa fix (Keith) - MD updates via Song: - Fix/Cleanup RCU usage from conf->disks[i].rdev (Yu Kuai) - Fix raid5 hang issue (Junxiao Bi) - Add Yu Kuai as Reviewer of the md subsystem - Remove deprecated flavors (Song Liu) - raid1 read error check support (Li Nan) - Better handle events off-by-1 case (Alex Lyakas) - Efficiency improvements for passthrough (Kundan) - Support for mapping integrity data directly (Keith) - Zoned write fix (Damien) - rnbd fixes (Kees, Santosh, Supriti) - Default to a sane discard size granularity (Christoph) - Make the default max transfer size naming less confusing (Christoph) - Remove support for deprecated host aware zoned model (Christoph) - Misc fixes (me, Li, Matthew, Min, Ming, Randy, liyouhong, Daniel, Bart, Christoph)" * tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux: (78 commits) block: Treat sequential write preferred zone type as invalid block: remove disk_clear_zoned sd: remove the !ZBC && blk_queue_is_zoned case in sd_read_block_characteristics drivers/block/xen-blkback/common.h: Fix spelling typo in comment blk-cgroup: fix rcu lockdep warning in blkg_lookup() blk-cgroup: don't use removal safe list iterators block: floor the discard granularity to the physical block size mtd_blkdevs: use the default discard granularity bcache: use the default discard granularity zram: use the default discard granularity null_blk: use the default discard granularity nbd: use the default discard granularity ubd: use the default discard granularity block: default the discard granularity to sector size bcache: discard_granularity should not be smaller than a sector block: remove two comments in bio_split_discard block: rename and document BLK_DEF_MAX_SECTORS loop: don't abuse BLK_DEF_MAX_SECTORS aoe: don't abuse BLK_DEF_MAX_SECTORS null_blk: don't cap max_hw_sectors to BLK_DEF_MAX_SECTORS ...
2 parents d05e626 + 587371e commit 01d550f

79 files changed

Lines changed: 1254 additions & 2660 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20079,6 +20079,7 @@ F: include/linux/property.h
2007920079

2008020080
SOFTWARE RAID (Multiple Disks) SUPPORT
2008120081
M: Song Liu <song@kernel.org>
20082+
R: Yu Kuai <yukuai3@huawei.com>
2008220083
L: linux-raid@vger.kernel.org
2008320084
S: Supported
2008420085
Q: https://patchwork.kernel.org/project/linux-raid/list/

arch/um/drivers/ubd_kern.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
798798
ubd_dev->cow.fd = err;
799799
}
800800
if (ubd_dev->no_trim == 0) {
801-
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
802801
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
803802
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
804803
}

block/bio-integrity.c

Lines changed: 216 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
6969

7070
memset(bip, 0, sizeof(*bip));
7171

72+
/* always report as many vecs as asked explicitly, not inline vecs */
73+
bip->bip_max_vcnt = nr_vecs;
7274
if (nr_vecs > inline_vecs) {
73-
bip->bip_max_vcnt = nr_vecs;
7475
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
7576
&bip->bip_max_vcnt, gfp_mask);
7677
if (!bip->bip_vec)
7778
goto err;
7879
} else {
7980
bip->bip_vec = bip->bip_inline_vecs;
80-
bip->bip_max_vcnt = inline_vecs;
8181
}
8282

8383
bip->bip_bio = bio;
@@ -91,6 +91,47 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
9191
}
9292
EXPORT_SYMBOL(bio_integrity_alloc);
9393

94+
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
95+
bool dirty)
96+
{
97+
int i;
98+
99+
for (i = 0; i < nr_vecs; i++) {
100+
if (dirty && !PageCompound(bv[i].bv_page))
101+
set_page_dirty_lock(bv[i].bv_page);
102+
unpin_user_page(bv[i].bv_page);
103+
}
104+
}
105+
106+
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
107+
{
108+
unsigned short nr_vecs = bip->bip_max_vcnt - 1;
109+
struct bio_vec *copy = &bip->bip_vec[1];
110+
size_t bytes = bip->bip_iter.bi_size;
111+
struct iov_iter iter;
112+
int ret;
113+
114+
iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
115+
ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
116+
WARN_ON_ONCE(ret != bytes);
117+
118+
bio_integrity_unpin_bvec(copy, nr_vecs, true);
119+
}
120+
121+
static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
122+
{
123+
bool dirty = bio_data_dir(bip->bip_bio) == READ;
124+
125+
if (bip->bip_flags & BIP_COPY_USER) {
126+
if (dirty)
127+
bio_integrity_uncopy_user(bip);
128+
kfree(bvec_virt(bip->bip_vec));
129+
return;
130+
}
131+
132+
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
133+
}
134+
94135
/**
95136
* bio_integrity_free - Free bio integrity payload
96137
* @bio: bio containing bip to be freed
@@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)
105146

106147
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
107148
kfree(bvec_virt(bip->bip_vec));
149+
else if (bip->bip_flags & BIP_INTEGRITY_USER)
150+
bio_integrity_unmap_user(bip);
108151

109152
__bio_integrity_free(bs, bip);
110153
bio->bi_integrity = NULL;
@@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
160203
}
161204
EXPORT_SYMBOL(bio_integrity_add_page);
162205

206+
static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
207+
int nr_vecs, unsigned int len,
208+
unsigned int direction, u32 seed)
209+
{
210+
bool write = direction == ITER_SOURCE;
211+
struct bio_integrity_payload *bip;
212+
struct iov_iter iter;
213+
void *buf;
214+
int ret;
215+
216+
buf = kmalloc(len, GFP_KERNEL);
217+
if (!buf)
218+
return -ENOMEM;
219+
220+
if (write) {
221+
iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
222+
if (!copy_from_iter_full(buf, len, &iter)) {
223+
ret = -EFAULT;
224+
goto free_buf;
225+
}
226+
227+
bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
228+
} else {
229+
memset(buf, 0, len);
230+
231+
/*
232+
* We need to preserve the original bvec and the number of vecs
233+
* in it for completion handling
234+
*/
235+
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
236+
}
237+
238+
if (IS_ERR(bip)) {
239+
ret = PTR_ERR(bip);
240+
goto free_buf;
241+
}
242+
243+
if (write)
244+
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
245+
else
246+
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
247+
248+
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
249+
offset_in_page(buf));
250+
if (ret != len) {
251+
ret = -ENOMEM;
252+
goto free_bip;
253+
}
254+
255+
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
256+
bip->bip_iter.bi_sector = seed;
257+
return 0;
258+
free_bip:
259+
bio_integrity_free(bio);
260+
free_buf:
261+
kfree(buf);
262+
return ret;
263+
}
264+
265+
static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
266+
int nr_vecs, unsigned int len, u32 seed)
267+
{
268+
struct bio_integrity_payload *bip;
269+
270+
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
271+
if (IS_ERR(bip))
272+
return PTR_ERR(bip);
273+
274+
memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
275+
bip->bip_flags |= BIP_INTEGRITY_USER;
276+
bip->bip_iter.bi_sector = seed;
277+
bip->bip_iter.bi_size = len;
278+
return 0;
279+
}
280+
281+
static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
282+
int nr_vecs, ssize_t bytes, ssize_t offset)
283+
{
284+
unsigned int nr_bvecs = 0;
285+
int i, j;
286+
287+
for (i = 0; i < nr_vecs; i = j) {
288+
size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
289+
struct folio *folio = page_folio(pages[i]);
290+
291+
bytes -= size;
292+
for (j = i + 1; j < nr_vecs; j++) {
293+
size_t next = min_t(size_t, PAGE_SIZE, bytes);
294+
295+
if (page_folio(pages[j]) != folio ||
296+
pages[j] != pages[j - 1] + 1)
297+
break;
298+
unpin_user_page(pages[j]);
299+
size += next;
300+
bytes -= next;
301+
}
302+
303+
bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
304+
offset = 0;
305+
nr_bvecs++;
306+
}
307+
308+
return nr_bvecs;
309+
}
310+
311+
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
312+
u32 seed)
313+
{
314+
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
315+
unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
316+
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
317+
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
318+
unsigned int direction, nr_bvecs;
319+
struct iov_iter iter;
320+
int ret, nr_vecs;
321+
size_t offset;
322+
bool copy;
323+
324+
if (bio_integrity(bio))
325+
return -EINVAL;
326+
if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
327+
return -E2BIG;
328+
329+
if (bio_data_dir(bio) == READ)
330+
direction = ITER_DEST;
331+
else
332+
direction = ITER_SOURCE;
333+
334+
iov_iter_ubuf(&iter, direction, ubuf, bytes);
335+
nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
336+
if (nr_vecs > BIO_MAX_VECS)
337+
return -E2BIG;
338+
if (nr_vecs > UIO_FASTIOV) {
339+
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
340+
if (!bvec)
341+
return -ENOMEM;
342+
pages = NULL;
343+
}
344+
345+
copy = !iov_iter_is_aligned(&iter, align, align);
346+
ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
347+
if (unlikely(ret < 0))
348+
goto free_bvec;
349+
350+
nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
351+
if (pages != stack_pages)
352+
kvfree(pages);
353+
if (nr_bvecs > queue_max_integrity_segments(q))
354+
copy = true;
355+
356+
if (copy)
357+
ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
358+
direction, seed);
359+
else
360+
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
361+
if (ret)
362+
goto release_pages;
363+
if (bvec != stack_vec)
364+
kfree(bvec);
365+
366+
return 0;
367+
368+
release_pages:
369+
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
370+
free_bvec:
371+
if (bvec != stack_vec)
372+
kfree(bvec);
373+
return ret;
374+
}
375+
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
376+
163377
/**
164378
* bio_integrity_process - Process integrity metadata for a bio
165379
* @bio: bio to generate/verify integrity metadata for

block/bio.c

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
944944

945945
if ((addr1 | mask) != (addr2 | mask))
946946
return false;
947-
if (bv->bv_len + len > queue_max_segment_size(q))
947+
if (len > queue_max_segment_size(q) - bv->bv_len)
948948
return false;
949949
return bvec_try_merge_page(bv, page, len, offset, same_page);
950950
}
@@ -966,10 +966,13 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
966966
struct page *page, unsigned int len, unsigned int offset,
967967
unsigned int max_sectors, bool *same_page)
968968
{
969+
unsigned int max_size = max_sectors << SECTOR_SHIFT;
970+
969971
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
970972
return 0;
971973

972-
if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
974+
len = min3(len, max_size, queue_max_segment_size(q));
975+
if (len > max_size - bio->bi_iter.bi_size)
973976
return 0;
974977

975978
if (bio->bi_vcnt > 0) {
@@ -1145,13 +1148,22 @@ EXPORT_SYMBOL(bio_add_folio);
11451148

11461149
void __bio_release_pages(struct bio *bio, bool mark_dirty)
11471150
{
1148-
struct bvec_iter_all iter_all;
1149-
struct bio_vec *bvec;
1151+
struct folio_iter fi;
11501152

1151-
bio_for_each_segment_all(bvec, bio, iter_all) {
1152-
if (mark_dirty && !PageCompound(bvec->bv_page))
1153-
set_page_dirty_lock(bvec->bv_page);
1154-
bio_release_page(bio, bvec->bv_page);
1153+
bio_for_each_folio_all(fi, bio) {
1154+
struct page *page;
1155+
size_t done = 0;
1156+
1157+
if (mark_dirty) {
1158+
folio_lock(fi.folio);
1159+
folio_mark_dirty(fi.folio);
1160+
folio_unlock(fi.folio);
1161+
}
1162+
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
1163+
do {
1164+
bio_release_page(bio, page++);
1165+
done += PAGE_SIZE;
1166+
} while (done < fi.length);
11551167
}
11561168
}
11571169
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1439,18 +1451,12 @@ EXPORT_SYMBOL(bio_free_pages);
14391451
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
14401452
* for performing direct-IO in BIOs.
14411453
*
1442-
* The problem is that we cannot run set_page_dirty() from interrupt context
1454+
* The problem is that we cannot run folio_mark_dirty() from interrupt context
14431455
* because the required locks are not interrupt-safe. So what we can do is to
14441456
* mark the pages dirty _before_ performing IO. And in interrupt context,
14451457
* check that the pages are still dirty. If so, fine. If not, redirty them
14461458
* in process context.
14471459
*
1448-
* We special-case compound pages here: normally this means reads into hugetlb
1449-
* pages. The logic in here doesn't really work right for compound pages
1450-
* because the VM does not uniformly chase down the head page in all cases.
1451-
* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
1452-
* handle them at all. So we skip compound pages here at an early stage.
1453-
*
14541460
* Note that this code is very hard to test under normal circumstances because
14551461
* direct-io pins the pages with get_user_pages(). This makes
14561462
* is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1466,12 +1472,12 @@ EXPORT_SYMBOL(bio_free_pages);
14661472
*/
14671473
void bio_set_pages_dirty(struct bio *bio)
14681474
{
1469-
struct bio_vec *bvec;
1470-
struct bvec_iter_all iter_all;
1475+
struct folio_iter fi;
14711476

1472-
bio_for_each_segment_all(bvec, bio, iter_all) {
1473-
if (!PageCompound(bvec->bv_page))
1474-
set_page_dirty_lock(bvec->bv_page);
1477+
bio_for_each_folio_all(fi, bio) {
1478+
folio_lock(fi.folio);
1479+
folio_mark_dirty(fi.folio);
1480+
folio_unlock(fi.folio);
14751481
}
14761482
}
14771483
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
@@ -1515,12 +1521,11 @@ static void bio_dirty_fn(struct work_struct *work)
15151521

15161522
void bio_check_pages_dirty(struct bio *bio)
15171523
{
1518-
struct bio_vec *bvec;
1524+
struct folio_iter fi;
15191525
unsigned long flags;
1520-
struct bvec_iter_all iter_all;
15211526

1522-
bio_for_each_segment_all(bvec, bio, iter_all) {
1523-
if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
1527+
bio_for_each_folio_all(fi, bio) {
1528+
if (!folio_test_dirty(fi.folio))
15241529
goto defer;
15251530
}
15261531

0 commit comments

Comments
 (0)