Skip to content

Commit f963cf2

Browse files
sbashirochucklever
authored andcommitted
NFSD: Implement large extent array support in pNFS
When pNFS client in the block or scsi layout mode sends layoutcommit to MDS, a variable length array of modified extents is supplied within the request. This patch allows the server to accept such extent arrays if they do not fit within single memory page. The issue can be reproduced when writing to a 1GB file using FIO with O_DIRECT, 4K block and large I/O depth without preallocation of the file. In this case, the server returns NFSERR_BADXDR to the client. Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com> Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com> Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
1 parent 6bf1be3 commit f963cf2

7 files changed

Lines changed: 78 additions & 46 deletions

File tree

fs/nfsd/blocklayout.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
173173
}
174174

175175
static __be32
176-
nfsd4_block_proc_layoutcommit(struct inode *inode,
176+
nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
177177
struct nfsd4_layoutcommit *lcp)
178178
{
179179
struct iomap *iomaps;
180180
int nr_iomaps;
181181
__be32 nfserr;
182182

183-
nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
184-
lcp->lc_up_len, &iomaps, &nr_iomaps,
185-
i_blocksize(inode));
183+
rqstp->rq_arg = lcp->lc_up_layout;
184+
svcxdr_init_decode(rqstp);
185+
186+
nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
187+
&iomaps, &nr_iomaps, i_blocksize(inode));
186188
if (nfserr != nfs_ok)
187189
return nfserr;
188190

@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
313315
return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
314316
}
315317
static __be32
316-
nfsd4_scsi_proc_layoutcommit(struct inode *inode,
318+
nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
317319
struct nfsd4_layoutcommit *lcp)
318320
{
319321
struct iomap *iomaps;
320322
int nr_iomaps;
321323
__be32 nfserr;
322324

323-
nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
324-
lcp->lc_up_len, &iomaps, &nr_iomaps,
325-
i_blocksize(inode));
325+
rqstp->rq_arg = lcp->lc_up_layout;
326+
svcxdr_init_decode(rqstp);
327+
328+
nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
329+
&iomaps, &nr_iomaps, i_blocksize(inode));
326330
if (nfserr != nfs_ok)
327331
return nfserr;
328332

fs/nfsd/blocklayoutxdr.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
113113

114114
/**
115115
* nfsd4_block_decode_layoutupdate - decode the block layout extent array
116-
* @p: pointer to the xdr data
117-
* @len: number of bytes to decode
116+
* @xdr: subbuf set to the encoded array
118117
* @iomapp: pointer to store the decoded extent array
119118
* @nr_iomapsp: pointer to store the number of extents
120119
* @block_size: alignment of extent offset and length
@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
127126
*
128127
* Return values:
129128
* %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
130-
* %nfserr_bad_xdr: The encoded array in @p is invalid
129+
* %nfserr_bad_xdr: The encoded array in @xdr is invalid
131130
* %nfserr_inval: An unaligned extent found
132131
* %nfserr_delay: Failed to allocate memory for @iomapp
133132
*/
134133
__be32
135-
nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
134+
nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
136135
int *nr_iomapsp, u32 block_size)
137136
{
138137
struct iomap *iomaps;
139-
u32 nr_iomaps, i;
138+
u32 nr_iomaps, expected, len, i;
139+
__be32 nfserr;
140140

141-
if (len < sizeof(u32))
142-
return nfserr_bad_xdr;
143-
len -= sizeof(u32);
144-
if (len % PNFS_BLOCK_EXTENT_SIZE)
141+
if (xdr_stream_decode_u32(xdr, &nr_iomaps))
145142
return nfserr_bad_xdr;
146143

147-
nr_iomaps = be32_to_cpup(p++);
148-
if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
144+
len = sizeof(__be32) + xdr_stream_remaining(xdr);
145+
expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
146+
if (len != expected)
149147
return nfserr_bad_xdr;
150148

151149
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
155153
for (i = 0; i < nr_iomaps; i++) {
156154
struct pnfs_block_extent bex;
157155

158-
p = svcxdr_decode_deviceid4(p, &bex.vol_id);
159-
p = xdr_decode_hyper(p, &bex.foff);
156+
if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
157+
nfserr = nfserr_bad_xdr;
158+
goto fail;
159+
}
160+
161+
if (xdr_stream_decode_u64(xdr, &bex.foff)) {
162+
nfserr = nfserr_bad_xdr;
163+
goto fail;
164+
}
160165
if (bex.foff & (block_size - 1)) {
166+
nfserr = nfserr_inval;
167+
goto fail;
168+
}
169+
170+
if (xdr_stream_decode_u64(xdr, &bex.len)) {
171+
nfserr = nfserr_bad_xdr;
161172
goto fail;
162173
}
163-
p = xdr_decode_hyper(p, &bex.len);
164174
if (bex.len & (block_size - 1)) {
175+
nfserr = nfserr_inval;
176+
goto fail;
177+
}
178+
179+
if (xdr_stream_decode_u64(xdr, &bex.soff)) {
180+
nfserr = nfserr_bad_xdr;
165181
goto fail;
166182
}
167-
p = xdr_decode_hyper(p, &bex.soff);
168183
if (bex.soff & (block_size - 1)) {
184+
nfserr = nfserr_inval;
185+
goto fail;
186+
}
187+
188+
if (xdr_stream_decode_u32(xdr, &bex.es)) {
189+
nfserr = nfserr_bad_xdr;
169190
goto fail;
170191
}
171-
bex.es = be32_to_cpup(p++);
172192
if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
193+
nfserr = nfserr_inval;
173194
goto fail;
174195
}
175196

@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
182203
return nfs_ok;
183204
fail:
184205
kfree(iomaps);
185-
return nfserr_inval;
206+
return nfserr;
186207
}
187208

188209
/**
189210
* nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
190-
* @p: pointer to the xdr data
191-
* @len: number of bytes to decode
211+
* @xdr: subbuf set to the encoded array
192212
* @iomapp: pointer to store the decoded extent array
193213
* @nr_iomapsp: pointer to store the number of extents
194214
* @block_size: alignment of extent offset and length
@@ -200,21 +220,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
200220
*
201221
* Return values:
202222
* %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
203-
* %nfserr_bad_xdr: The encoded array in @p is invalid
223+
* %nfserr_bad_xdr: The encoded array in @xdr is invalid
204224
* %nfserr_inval: An unaligned extent found
205225
* %nfserr_delay: Failed to allocate memory for @iomapp
206226
*/
207227
__be32
208-
nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
228+
nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
209229
int *nr_iomapsp, u32 block_size)
210230
{
211231
struct iomap *iomaps;
212-
u32 nr_iomaps, expected, i;
232+
u32 nr_iomaps, expected, len, i;
233+
__be32 nfserr;
213234

214-
if (len < sizeof(u32))
235+
if (xdr_stream_decode_u32(xdr, &nr_iomaps))
215236
return nfserr_bad_xdr;
216237

217-
nr_iomaps = be32_to_cpup(p++);
238+
len = sizeof(__be32) + xdr_stream_remaining(xdr);
218239
expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
219240
if (len != expected)
220241
return nfserr_bad_xdr;
@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
226247
for (i = 0; i < nr_iomaps; i++) {
227248
u64 val;
228249

229-
p = xdr_decode_hyper(p, &val);
250+
if (xdr_stream_decode_u64(xdr, &val)) {
251+
nfserr = nfserr_bad_xdr;
252+
goto fail;
253+
}
230254
if (val & (block_size - 1)) {
255+
nfserr = nfserr_inval;
231256
goto fail;
232257
}
233258
iomaps[i].offset = val;
234259

235-
p = xdr_decode_hyper(p, &val);
260+
if (xdr_stream_decode_u64(xdr, &val)) {
261+
nfserr = nfserr_bad_xdr;
262+
goto fail;
263+
}
236264
if (val & (block_size - 1)) {
265+
nfserr = nfserr_inval;
237266
goto fail;
238267
}
239268
iomaps[i].length = val;
@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
244273
return nfs_ok;
245274
fail:
246275
kfree(iomaps);
247-
return nfserr_inval;
276+
return nfserr;
248277
}

fs/nfsd/blocklayoutxdr.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
5454
const struct nfsd4_getdeviceinfo *gdp);
5555
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
5656
const struct nfsd4_layoutget *lgp);
57-
__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
57+
__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
5858
struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
59-
__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
59+
__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
6060
struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
6161

6262
#endif /* _NFSD_BLOCKLAYOUTXDR_H */

fs/nfsd/nfs4proc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2520,7 +2520,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
25202520
lcp->lc_size_chg = false;
25212521
}
25222522

2523-
nfserr = ops->proc_layoutcommit(inode, lcp);
2523+
nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
25242524
nfs4_put_stid(&ls->ls_stid);
25252525
out:
25262526
return nfserr;

fs/nfsd/nfs4xdr.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -592,20 +592,19 @@ static __be32
592592
nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
593593
struct nfsd4_layoutcommit *lcp)
594594
{
595+
u32 len;
596+
595597
if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
596598
return nfserr_bad_xdr;
597599
if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
598600
return nfserr_bad_xdr;
599601
if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
600602
return nfserr_bad_xdr;
601603

602-
if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
604+
if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
605+
return nfserr_bad_xdr;
606+
if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
603607
return nfserr_bad_xdr;
604-
if (lcp->lc_up_len > 0) {
605-
lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
606-
if (!lcp->lc_up_layout)
607-
return nfserr_bad_xdr;
608-
}
609608

610609
return nfs_ok;
611610
}

fs/nfsd/pnfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct nfsd4_layout_ops {
3535
const struct nfsd4_layoutget *lgp);
3636

3737
__be32 (*proc_layoutcommit)(struct inode *inode,
38+
struct svc_rqst *rqstp,
3839
struct nfsd4_layoutcommit *lcp);
3940

4041
void (*fence_client)(struct nfs4_layout_stateid *ls,

fs/nfsd/xdr4.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -664,8 +664,7 @@ struct nfsd4_layoutcommit {
664664
u64 lc_last_wr; /* request */
665665
struct timespec64 lc_mtime; /* request */
666666
u32 lc_layout_type; /* request */
667-
u32 lc_up_len; /* layout length */
668-
void *lc_up_layout; /* decoded by callback */
667+
struct xdr_buf lc_up_layout; /* decoded by callback */
669668
bool lc_size_chg; /* response */
670669
u64 lc_newsize; /* response */
671670
};

0 commit comments

Comments
 (0)