Skip to content

Commit 7a18c0f

Browse files
committed
Merge tag 'ovl-vfs-6.8' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs
Pull backing file updates from Amir Goldstein: These patches essentially just lift some overlayfs code to common code. The motivation is to reuse common stacking code for the FUSE passthrough patches that I am shaping up for upstream. The FUSE passthrough work will be coming in over the next cycles. I have been testing those patches with my fuse-backing-fd development branch for quite some time and I think both you and Miklos gave a conceptual ACK to some version of this work. * tag 'ovl-vfs-6.8' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs: fs: factor out backing_file_mmap() helper fs: factor out backing_file_splice_{read,write}() helpers fs: factor out backing_file_{read,write}_iter() helpers fs: prepare for stackable filesystems backing file helpers Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents d9e5d31 + f567377 commit 7a18c0f

11 files changed

Lines changed: 435 additions & 263 deletions

File tree

MAINTAINERS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8186,6 +8186,15 @@ S: Supported
81868186
F: fs/iomap/
81878187
F: include/linux/iomap.h
81888188

8189+
FILESYSTEMS [STACKABLE]
8190+
M: Miklos Szeredi <miklos@szeredi.hu>
8191+
M: Amir Goldstein <amir73il@gmail.com>
8192+
L: linux-fsdevel@vger.kernel.org
8193+
L: linux-unionfs@vger.kernel.org
8194+
S: Maintained
8195+
F: fs/backing-file.c
8196+
F: include/linux/backing-file.h
8197+
81898198
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
81908199
M: Riku Voipio <riku.voipio@iki.fi>
81918200
L: linux-hwmon@vger.kernel.org

fs/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER
1818
config FS_IOMAP
1919
bool
2020

21+
# Stackable filesystems
22+
config FS_STACK
23+
bool
24+
2125
config BUFFER_HEAD
2226
bool
2327

fs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
3939
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
4040
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
4141

42+
obj-$(CONFIG_FS_STACK) += backing-file.o
4243
obj-$(CONFIG_FS_MBCACHE) += mbcache.o
4344
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
4445
obj-$(CONFIG_NFS_COMMON) += nfs_common/

fs/backing-file.c

Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Common helpers for stackable filesystems and backing files.
4+
*
5+
* Forked from fs/overlayfs/file.c.
6+
*
7+
* Copyright (C) 2017 Red Hat, Inc.
8+
* Copyright (C) 2023 CTERA Networks.
9+
*/
10+
11+
#include <linux/fs.h>
12+
#include <linux/backing-file.h>
13+
#include <linux/splice.h>
14+
#include <linux/mm.h>
15+
16+
#include "internal.h"
17+
18+
/**
19+
* backing_file_open - open a backing file for kernel internal use
20+
* @user_path: path that the user reuqested to open
21+
* @flags: open flags
22+
* @real_path: path of the backing file
23+
* @cred: credentials for open
24+
*
25+
* Open a backing file for a stackable filesystem (e.g., overlayfs).
26+
* @user_path may be on the stackable filesystem and @real_path on the
27+
* underlying filesystem. In this case, we want to be able to return the
28+
* @user_path of the stackable filesystem. This is done by embedding the
29+
* returned file into a container structure that also stores the stacked
30+
* file's path, which can be retrieved using backing_file_user_path().
31+
*/
32+
struct file *backing_file_open(const struct path *user_path, int flags,
33+
const struct path *real_path,
34+
const struct cred *cred)
35+
{
36+
struct file *f;
37+
int error;
38+
39+
f = alloc_empty_backing_file(flags, cred);
40+
if (IS_ERR(f))
41+
return f;
42+
43+
path_get(user_path);
44+
*backing_file_user_path(f) = *user_path;
45+
error = vfs_open(real_path, f);
46+
if (error) {
47+
fput(f);
48+
f = ERR_PTR(error);
49+
}
50+
51+
return f;
52+
}
53+
EXPORT_SYMBOL_GPL(backing_file_open);
54+
55+
struct backing_aio {
56+
struct kiocb iocb;
57+
refcount_t ref;
58+
struct kiocb *orig_iocb;
59+
/* used for aio completion */
60+
void (*end_write)(struct file *);
61+
struct work_struct work;
62+
long res;
63+
};
64+
65+
static struct kmem_cache *backing_aio_cachep;
66+
67+
#define BACKING_IOCB_MASK \
68+
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
69+
70+
static rwf_t iocb_to_rw_flags(int flags)
71+
{
72+
return (__force rwf_t)(flags & BACKING_IOCB_MASK);
73+
}
74+
75+
static void backing_aio_put(struct backing_aio *aio)
76+
{
77+
if (refcount_dec_and_test(&aio->ref)) {
78+
fput(aio->iocb.ki_filp);
79+
kmem_cache_free(backing_aio_cachep, aio);
80+
}
81+
}
82+
83+
static void backing_aio_cleanup(struct backing_aio *aio, long res)
84+
{
85+
struct kiocb *iocb = &aio->iocb;
86+
struct kiocb *orig_iocb = aio->orig_iocb;
87+
88+
if (aio->end_write)
89+
aio->end_write(orig_iocb->ki_filp);
90+
91+
orig_iocb->ki_pos = iocb->ki_pos;
92+
backing_aio_put(aio);
93+
}
94+
95+
static void backing_aio_rw_complete(struct kiocb *iocb, long res)
96+
{
97+
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
98+
struct kiocb *orig_iocb = aio->orig_iocb;
99+
100+
if (iocb->ki_flags & IOCB_WRITE)
101+
kiocb_end_write(iocb);
102+
103+
backing_aio_cleanup(aio, res);
104+
orig_iocb->ki_complete(orig_iocb, res);
105+
}
106+
107+
static void backing_aio_complete_work(struct work_struct *work)
108+
{
109+
struct backing_aio *aio = container_of(work, struct backing_aio, work);
110+
111+
backing_aio_rw_complete(&aio->iocb, aio->res);
112+
}
113+
114+
static void backing_aio_queue_completion(struct kiocb *iocb, long res)
115+
{
116+
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
117+
118+
/*
119+
* Punt to a work queue to serialize updates of mtime/size.
120+
*/
121+
aio->res = res;
122+
INIT_WORK(&aio->work, backing_aio_complete_work);
123+
queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
124+
&aio->work);
125+
}
126+
127+
static int backing_aio_init_wq(struct kiocb *iocb)
128+
{
129+
struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
130+
131+
if (sb->s_dio_done_wq)
132+
return 0;
133+
134+
return sb_init_dio_done_wq(sb);
135+
}
136+
137+
138+
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
139+
struct kiocb *iocb, int flags,
140+
struct backing_file_ctx *ctx)
141+
{
142+
struct backing_aio *aio = NULL;
143+
const struct cred *old_cred;
144+
ssize_t ret;
145+
146+
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
147+
return -EIO;
148+
149+
if (!iov_iter_count(iter))
150+
return 0;
151+
152+
if (iocb->ki_flags & IOCB_DIRECT &&
153+
!(file->f_mode & FMODE_CAN_ODIRECT))
154+
return -EINVAL;
155+
156+
old_cred = override_creds(ctx->cred);
157+
if (is_sync_kiocb(iocb)) {
158+
rwf_t rwf = iocb_to_rw_flags(flags);
159+
160+
ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
161+
} else {
162+
ret = -ENOMEM;
163+
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
164+
if (!aio)
165+
goto out;
166+
167+
aio->orig_iocb = iocb;
168+
kiocb_clone(&aio->iocb, iocb, get_file(file));
169+
aio->iocb.ki_complete = backing_aio_rw_complete;
170+
refcount_set(&aio->ref, 2);
171+
ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
172+
backing_aio_put(aio);
173+
if (ret != -EIOCBQUEUED)
174+
backing_aio_cleanup(aio, ret);
175+
}
176+
out:
177+
revert_creds(old_cred);
178+
179+
if (ctx->accessed)
180+
ctx->accessed(ctx->user_file);
181+
182+
return ret;
183+
}
184+
EXPORT_SYMBOL_GPL(backing_file_read_iter);
185+
186+
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
187+
struct kiocb *iocb, int flags,
188+
struct backing_file_ctx *ctx)
189+
{
190+
const struct cred *old_cred;
191+
ssize_t ret;
192+
193+
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
194+
return -EIO;
195+
196+
if (!iov_iter_count(iter))
197+
return 0;
198+
199+
ret = file_remove_privs(ctx->user_file);
200+
if (ret)
201+
return ret;
202+
203+
if (iocb->ki_flags & IOCB_DIRECT &&
204+
!(file->f_mode & FMODE_CAN_ODIRECT))
205+
return -EINVAL;
206+
207+
/*
208+
* Stacked filesystems don't support deferred completions, don't copy
209+
* this property in case it is set by the issuer.
210+
*/
211+
flags &= ~IOCB_DIO_CALLER_COMP;
212+
213+
old_cred = override_creds(ctx->cred);
214+
if (is_sync_kiocb(iocb)) {
215+
rwf_t rwf = iocb_to_rw_flags(flags);
216+
217+
ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
218+
if (ctx->end_write)
219+
ctx->end_write(ctx->user_file);
220+
} else {
221+
struct backing_aio *aio;
222+
223+
ret = backing_aio_init_wq(iocb);
224+
if (ret)
225+
goto out;
226+
227+
ret = -ENOMEM;
228+
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
229+
if (!aio)
230+
goto out;
231+
232+
aio->orig_iocb = iocb;
233+
aio->end_write = ctx->end_write;
234+
kiocb_clone(&aio->iocb, iocb, get_file(file));
235+
aio->iocb.ki_flags = flags;
236+
aio->iocb.ki_complete = backing_aio_queue_completion;
237+
refcount_set(&aio->ref, 2);
238+
ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
239+
backing_aio_put(aio);
240+
if (ret != -EIOCBQUEUED)
241+
backing_aio_cleanup(aio, ret);
242+
}
243+
out:
244+
revert_creds(old_cred);
245+
246+
return ret;
247+
}
248+
EXPORT_SYMBOL_GPL(backing_file_write_iter);
249+
250+
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
251+
struct pipe_inode_info *pipe, size_t len,
252+
unsigned int flags,
253+
struct backing_file_ctx *ctx)
254+
{
255+
const struct cred *old_cred;
256+
ssize_t ret;
257+
258+
if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
259+
return -EIO;
260+
261+
old_cred = override_creds(ctx->cred);
262+
ret = vfs_splice_read(in, ppos, pipe, len, flags);
263+
revert_creds(old_cred);
264+
265+
if (ctx->accessed)
266+
ctx->accessed(ctx->user_file);
267+
268+
return ret;
269+
}
270+
EXPORT_SYMBOL_GPL(backing_file_splice_read);
271+
272+
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
273+
struct file *out, loff_t *ppos, size_t len,
274+
unsigned int flags,
275+
struct backing_file_ctx *ctx)
276+
{
277+
const struct cred *old_cred;
278+
ssize_t ret;
279+
280+
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
281+
return -EIO;
282+
283+
ret = file_remove_privs(ctx->user_file);
284+
if (ret)
285+
return ret;
286+
287+
old_cred = override_creds(ctx->cred);
288+
file_start_write(out);
289+
ret = iter_file_splice_write(pipe, out, ppos, len, flags);
290+
file_end_write(out);
291+
revert_creds(old_cred);
292+
293+
if (ctx->end_write)
294+
ctx->end_write(ctx->user_file);
295+
296+
return ret;
297+
}
298+
EXPORT_SYMBOL_GPL(backing_file_splice_write);
299+
300+
int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
301+
struct backing_file_ctx *ctx)
302+
{
303+
const struct cred *old_cred;
304+
int ret;
305+
306+
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
307+
WARN_ON_ONCE(ctx->user_file != vma->vm_file))
308+
return -EIO;
309+
310+
if (!file->f_op->mmap)
311+
return -ENODEV;
312+
313+
vma_set_file(vma, file);
314+
315+
old_cred = override_creds(ctx->cred);
316+
ret = call_mmap(vma->vm_file, vma);
317+
revert_creds(old_cred);
318+
319+
if (ctx->accessed)
320+
ctx->accessed(ctx->user_file);
321+
322+
return ret;
323+
}
324+
EXPORT_SYMBOL_GPL(backing_file_mmap);
325+
326+
static int __init backing_aio_init(void)
327+
{
328+
backing_aio_cachep = kmem_cache_create("backing_aio",
329+
sizeof(struct backing_aio),
330+
0, SLAB_HWCACHE_ALIGN, NULL);
331+
if (!backing_aio_cachep)
332+
return -ENOMEM;
333+
334+
return 0;
335+
}
336+
fs_initcall(backing_aio_init);

0 commit comments

Comments
 (0)