Skip to content

Commit 5076a60

Browse files
author
Darrick J. Wong
committed
xfs: support in-memory buffer cache targets
Allow the buffer cache to target in-memory files by making it possible to have a buftarg that maps pages from private shmem files. As the prevous patch alludes, the in-memory buftarg contains its own cache, points to a shmem file, and does not point to a block_device. The next few patches will make it possible to construct an xfs_btree in pageable memory by using this buftarg. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent e7b58f7 commit 5076a60

8 files changed

Lines changed: 369 additions & 46 deletions

File tree

fs/xfs/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,17 @@ config XFS_LIVE_HOOKS
128128
bool
129129
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
130130

131+
config XFS_MEMORY_BUFS
132+
bool
133+
131134
config XFS_ONLINE_SCRUB
132135
bool "XFS online metadata check support"
133136
default n
134137
depends on XFS_FS
135138
depends on TMPFS && SHMEM
136139
select XFS_LIVE_HOOKS
137140
select XFS_DRAIN_INTENTS
141+
select XFS_MEMORY_BUFS
138142
help
139143
If you say Y here you will be able to check metadata on a
140144
mounted XFS filesystem. This feature is intended to reduce

fs/xfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ endif
137137

138138
xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o
139139
xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o
140+
xfs-$(CONFIG_XFS_MEMORY_BUFS) += xfs_buf_mem.o
140141

141142
# online scrub/repair
142143
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)

fs/xfs/xfs_buf.c

Lines changed: 86 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "xfs_errortag.h"
2222
#include "xfs_error.h"
2323
#include "xfs_ag.h"
24+
#include "xfs_buf_mem.h"
2425

2526
struct kmem_cache *xfs_buf_cache;
2627

@@ -318,7 +319,9 @@ xfs_buf_free(
318319

319320
ASSERT(list_empty(&bp->b_lru));
320321

321-
if (bp->b_flags & _XBF_PAGES)
322+
if (xfs_buftarg_is_mem(bp->b_target))
323+
xmbuf_unmap_page(bp);
324+
else if (bp->b_flags & _XBF_PAGES)
322325
xfs_buf_free_pages(bp);
323326
else if (bp->b_flags & _XBF_KMEM)
324327
kfree(bp->b_addr);
@@ -634,18 +637,20 @@ xfs_buf_find_insert(
634637
if (error)
635638
goto out_drop_pag;
636639

637-
/*
638-
* For buffers that fit entirely within a single page, first attempt to
639-
* allocate the memory from the heap to minimise memory usage. If we
640-
* can't get heap memory for these small buffers, we fall back to using
641-
* the page allocator.
642-
*/
643-
if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
644-
xfs_buf_alloc_kmem(new_bp, flags) < 0) {
640+
if (xfs_buftarg_is_mem(new_bp->b_target)) {
641+
error = xmbuf_map_page(new_bp);
642+
} else if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
643+
xfs_buf_alloc_kmem(new_bp, flags) < 0) {
644+
/*
645+
* For buffers that fit entirely within a single page, first
646+
* attempt to allocate the memory from the heap to minimise
647+
* memory usage. If we can't get heap memory for these small
648+
* buffers, we fall back to using the page allocator.
649+
*/
645650
error = xfs_buf_alloc_pages(new_bp, flags);
646-
if (error)
647-
goto out_free_buf;
648651
}
652+
if (error)
653+
goto out_free_buf;
649654

650655
spin_lock(&bch->bc_lock);
651656
bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
@@ -688,6 +693,8 @@ xfs_buftarg_get_pag(
688693
{
689694
struct xfs_mount *mp = btp->bt_mount;
690695

696+
if (xfs_buftarg_is_mem(btp))
697+
return NULL;
691698
return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
692699
}
693700

@@ -696,7 +703,9 @@ xfs_buftarg_buf_cache(
696703
struct xfs_buftarg *btp,
697704
struct xfs_perag *pag)
698705
{
699-
return &pag->pag_bcache;
706+
if (pag)
707+
return &pag->pag_bcache;
708+
return btp->bt_cache;
700709
}
701710

702711
/*
@@ -926,6 +935,13 @@ xfs_buf_readahead_map(
926935
{
927936
struct xfs_buf *bp;
928937

938+
/*
939+
* Currently we don't have a good means or justification for performing
940+
* xmbuf_map_page asynchronously, so we don't do readahead.
941+
*/
942+
if (xfs_buftarg_is_mem(target))
943+
return;
944+
929945
xfs_buf_read_map(target, map, nmaps,
930946
XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
931947
__this_address);
@@ -991,7 +1007,10 @@ xfs_buf_get_uncached(
9911007
if (error)
9921008
return error;
9931009

994-
error = xfs_buf_alloc_pages(bp, flags);
1010+
if (xfs_buftarg_is_mem(bp->b_target))
1011+
error = xmbuf_map_page(bp);
1012+
else
1013+
error = xfs_buf_alloc_pages(bp, flags);
9951014
if (error)
9961015
goto fail_free_buf;
9971016

@@ -1633,6 +1652,12 @@ _xfs_buf_ioapply(
16331652
/* we only use the buffer cache for meta-data */
16341653
op |= REQ_META;
16351654

1655+
/* in-memory targets are directly mapped, no IO required. */
1656+
if (xfs_buftarg_is_mem(bp->b_target)) {
1657+
xfs_buf_ioend(bp);
1658+
return;
1659+
}
1660+
16361661
/*
16371662
* Walk all the vectors issuing IO on them. Set up the initial offset
16381663
* into the buffer and the desired IO size before we start -
@@ -1988,19 +2013,24 @@ xfs_buftarg_shrink_count(
19882013
}
19892014

19902015
void
1991-
xfs_free_buftarg(
2016+
xfs_destroy_buftarg(
19922017
struct xfs_buftarg *btp)
19932018
{
19942019
shrinker_free(btp->bt_shrinker);
19952020
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
19962021
percpu_counter_destroy(&btp->bt_io_count);
19972022
list_lru_destroy(&btp->bt_lru);
2023+
}
19982024

2025+
void
2026+
xfs_free_buftarg(
2027+
struct xfs_buftarg *btp)
2028+
{
2029+
xfs_destroy_buftarg(btp);
19992030
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
20002031
/* the main block device is closed by kill_block_super */
20012032
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
20022033
bdev_release(btp->bt_bdev_handle);
2003-
20042034
kfree(btp);
20052035
}
20062036

@@ -2023,6 +2053,45 @@ xfs_setsize_buftarg(
20232053
return 0;
20242054
}
20252055

2056+
int
2057+
xfs_init_buftarg(
2058+
struct xfs_buftarg *btp,
2059+
size_t logical_sectorsize,
2060+
const char *descr)
2061+
{
2062+
/* Set up device logical sector size mask */
2063+
btp->bt_logical_sectorsize = logical_sectorsize;
2064+
btp->bt_logical_sectormask = logical_sectorsize - 1;
2065+
2066+
/*
2067+
* Buffer IO error rate limiting. Limit it to no more than 10 messages
2068+
* per 30 seconds so as to not spam logs too much on repeated errors.
2069+
*/
2070+
ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
2071+
DEFAULT_RATELIMIT_BURST);
2072+
2073+
if (list_lru_init(&btp->bt_lru))
2074+
return -ENOMEM;
2075+
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
2076+
goto out_destroy_lru;
2077+
2078+
btp->bt_shrinker =
2079+
shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", descr);
2080+
if (!btp->bt_shrinker)
2081+
goto out_destroy_io_count;
2082+
btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
2083+
btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
2084+
btp->bt_shrinker->private_data = btp;
2085+
shrinker_register(btp->bt_shrinker);
2086+
return 0;
2087+
2088+
out_destroy_io_count:
2089+
percpu_counter_destroy(&btp->bt_io_count);
2090+
out_destroy_lru:
2091+
list_lru_destroy(&btp->bt_lru);
2092+
return -ENOMEM;
2093+
}
2094+
20262095
struct xfs_buftarg *
20272096
xfs_alloc_buftarg(
20282097
struct xfs_mount *mp,
@@ -2049,41 +2118,12 @@ xfs_alloc_buftarg(
20492118
*/
20502119
if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
20512120
goto error_free;
2052-
2053-
/* Set up device logical sector size mask */
2054-
btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
2055-
btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
2056-
2057-
/*
2058-
* Buffer IO error rate limiting. Limit it to no more than 10 messages
2059-
* per 30 seconds so as to not spam logs too much on repeated errors.
2060-
*/
2061-
ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
2062-
DEFAULT_RATELIMIT_BURST);
2063-
2064-
if (list_lru_init(&btp->bt_lru))
2121+
if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
2122+
mp->m_super->s_id))
20652123
goto error_free;
20662124

2067-
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
2068-
goto error_lru;
2069-
2070-
btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s",
2071-
mp->m_super->s_id);
2072-
if (!btp->bt_shrinker)
2073-
goto error_pcpu;
2074-
2075-
btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
2076-
btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
2077-
btp->bt_shrinker->private_data = btp;
2078-
2079-
shrinker_register(btp->bt_shrinker);
2080-
20812125
return btp;
20822126

2083-
error_pcpu:
2084-
percpu_counter_destroy(&btp->bt_io_count);
2085-
error_lru:
2086-
list_lru_destroy(&btp->bt_lru);
20872127
error_free:
20882128
kfree(btp);
20892129
return NULL;

fs/xfs/xfs_buf.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ struct xfs_buftarg {
109109
struct bdev_handle *bt_bdev_handle;
110110
struct block_device *bt_bdev;
111111
struct dax_device *bt_daxdev;
112+
struct file *bt_file;
112113
u64 bt_dax_part_off;
113114
struct xfs_mount *bt_mount;
114115
unsigned int bt_meta_sectorsize;
@@ -122,6 +123,9 @@ struct xfs_buftarg {
122123

123124
struct percpu_counter bt_io_count;
124125
struct ratelimit_state bt_ioerror_rl;
126+
127+
/* built-in cache, if we're not using the perag one */
128+
struct xfs_buf_cache bt_cache[];
125129
};
126130

127131
#define XB_PAGES 2
@@ -387,4 +391,9 @@ int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
387391
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
388392
bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
389393

394+
/* for xfs_buf_mem.c only: */
395+
int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
396+
const char *descr);
397+
void xfs_destroy_buftarg(struct xfs_buftarg *btp);
398+
390399
#endif /* __XFS_BUF_H__ */

0 commit comments

Comments
 (0)