Skip to content

Commit 20b1d75

Browse files
Jonathan CurleyAnna Schumaker
authored andcommitted
NFSv4/flexfiles: Add support for striped layouts
Updates lseg creation path to parse and add striped layouts. Enable support for striped layouts. Limitations: 1. All mirrors must have the same number of stripes. Signed-off-by: Jonathan Curley <jcurley@purestorage.com> Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
1 parent 8a8e0f5 commit 20b1d75

2 files changed

Lines changed: 157 additions & 92 deletions

File tree

fs/nfs/flexfilelayout/flexfilelayout.c

Lines changed: 155 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,19 @@ ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
177177
#endif
178178
}
179179

180-
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
181-
const struct nfs4_ff_layout_mirror *m2)
180+
static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
181+
const struct nfs4_ff_layout_ds_stripe *dss2)
182182
{
183183
int i, j;
184184

185-
if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt)
185+
if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
186186
return false;
187-
for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) {
187+
188+
for (i = 0; i < dss1->fh_versions_cnt; i++) {
188189
bool found_fh = false;
189-
for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) {
190-
if (nfs_compare_fh(&m1->dss[0].fh_versions[i],
191-
&m2->dss[0].fh_versions[j]) == 0) {
190+
for (j = 0; j < dss2->fh_versions_cnt; j++) {
191+
if (nfs_compare_fh(&dss1->fh_versions[i],
192+
&dss2->fh_versions[j]) == 0) {
192193
found_fh = true;
193194
break;
194195
}
@@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
199200
return true;
200201
}
201202

203+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
204+
const struct nfs4_ff_layout_mirror *m2)
205+
{
206+
u32 dss_id;
207+
208+
if (m1->dss_count != m2->dss_count)
209+
return false;
210+
211+
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
212+
if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
213+
return false;
214+
215+
return true;
216+
}
217+
218+
static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
219+
const struct nfs4_ff_layout_mirror *m2)
220+
{
221+
u32 dss_id;
222+
223+
if (m1->dss_count != m2->dss_count)
224+
return false;
225+
226+
for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
227+
if (memcmp(&m1->dss[dss_id].devid,
228+
&m2->dss[dss_id].devid,
229+
sizeof(m1->dss[dss_id].devid)) != 0)
230+
return false;
231+
232+
return true;
233+
}
234+
202235
static struct nfs4_ff_layout_mirror *
203236
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
204237
struct nfs4_ff_layout_mirror *mirror)
@@ -209,8 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
209242

210243
spin_lock(&inode->i_lock);
211244
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
212-
if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid,
213-
sizeof(pos->dss[0].devid)) != 0)
245+
if (!ff_mirror_match_devid(mirror, pos))
214246
continue;
215247
if (!ff_mirror_match_fh(mirror, pos))
216248
continue;
@@ -241,31 +273,35 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
241273
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
242274
{
243275
struct nfs4_ff_layout_mirror *mirror;
276+
u32 dss_id;
244277

245278
mirror = kzalloc(sizeof(*mirror), gfp_flags);
246279
if (mirror != NULL) {
247280
spin_lock_init(&mirror->lock);
248281
refcount_set(&mirror->ref, 1);
249282
INIT_LIST_HEAD(&mirror->mirrors);
250-
nfs_localio_file_init(&mirror->dss[0].nfl);
283+
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
284+
nfs_localio_file_init(&mirror->dss[dss_id].nfl);
251285
}
252286
return mirror;
253287
}
254288

255289
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
256290
{
257291
const struct cred *cred;
258-
int dss_id = 0;
292+
u32 dss_id;
259293

260294
ff_layout_remove_mirror(mirror);
261295

262-
kfree(mirror->dss[dss_id].fh_versions);
263-
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
264-
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
265-
put_cred(cred);
266-
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
267-
put_cred(cred);
268-
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
296+
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
297+
kfree(mirror->dss[dss_id].fh_versions);
298+
cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
299+
put_cred(cred);
300+
cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
301+
put_cred(cred);
302+
nfs_close_local_fh(&mirror->dss[dss_id].nfl);
303+
nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
304+
}
269305

270306
kfree(mirror->dss);
271307
kfree(mirror);
@@ -371,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
371407
free_me);
372408
}
373409

410+
static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
411+
{
412+
u32 dss_id, sum = 0;
413+
414+
for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
415+
sum += mirror->dss[dss_id].efficiency;
416+
417+
return sum;
418+
}
419+
374420
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
375421
{
376422
int i, j;
377423

378424
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
379425
for (j = i + 1; j < fls->mirror_array_cnt; j++)
380-
if (fls->mirror_array[i]->dss[0].efficiency <
381-
fls->mirror_array[j]->dss[0].efficiency)
426+
if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
427+
ff_mirror_efficiency_sum(fls->mirror_array[j]))
382428
swap(fls->mirror_array[i],
383429
fls->mirror_array[j]);
384430
}
@@ -398,6 +444,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
398444
u32 mirror_array_cnt;
399445
__be32 *p;
400446
int i, rc;
447+
struct nfs4_ff_layout_ds_stripe *dss_info;
401448

402449
dprintk("--> %s\n", __func__);
403450
scratch = folio_alloc(gfp_flags, 0);
@@ -440,17 +487,24 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
440487
kuid_t uid;
441488
kgid_t gid;
442489
u32 fh_count, id;
443-
int j, dss_id = 0;
490+
int j, dss_id;
444491

445492
rc = -EIO;
446493
p = xdr_inline_decode(&stream, 4);
447494
if (!p)
448495
goto out_err_free;
449496

450-
dss_count = be32_to_cpup(p);
497+
// Ensure all mirrors have same stripe count.
498+
if (dss_count == 0)
499+
dss_count = be32_to_cpup(p);
500+
else if (dss_count != be32_to_cpup(p))
501+
goto out_err_free;
502+
503+
if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
504+
dss_count == 0)
505+
goto out_err_free;
451506

452-
/* FIXME: allow for striping? */
453-
if (dss_count != 1)
507+
if (dss_count > 1 && stripe_unit == 0)
454508
goto out_err_free;
455509

456510
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
@@ -464,91 +518,100 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
464518
kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
465519
gfp_flags);
466520

467-
/* deviceid */
468-
rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid);
469-
if (rc)
470-
goto out_err_free;
521+
for (dss_id = 0; dss_id < dss_count; dss_id++) {
522+
dss_info = &fls->mirror_array[i]->dss[dss_id];
523+
dss_info->mirror = fls->mirror_array[i];
471524

472-
/* efficiency */
473-
rc = -EIO;
474-
p = xdr_inline_decode(&stream, 4);
475-
if (!p)
476-
goto out_err_free;
477-
fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p);
525+
/* deviceid */
526+
rc = decode_deviceid(&stream, &dss_info->devid);
527+
if (rc)
528+
goto out_err_free;
478529

479-
/* stateid */
480-
rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid);
481-
if (rc)
482-
goto out_err_free;
530+
/* efficiency */
531+
rc = -EIO;
532+
p = xdr_inline_decode(&stream, 4);
533+
if (!p)
534+
goto out_err_free;
535+
dss_info->efficiency = be32_to_cpup(p);
483536

484-
/* fh */
485-
rc = -EIO;
486-
p = xdr_inline_decode(&stream, 4);
487-
if (!p)
488-
goto out_err_free;
489-
fh_count = be32_to_cpup(p);
537+
/* stateid */
538+
rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
539+
if (rc)
540+
goto out_err_free;
490541

491-
fls->mirror_array[i]->dss[dss_id].fh_versions =
492-
kcalloc(fh_count, sizeof(struct nfs_fh),
493-
gfp_flags);
494-
if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) {
495-
rc = -ENOMEM;
496-
goto out_err_free;
497-
}
542+
/* fh */
543+
rc = -EIO;
544+
p = xdr_inline_decode(&stream, 4);
545+
if (!p)
546+
goto out_err_free;
547+
fh_count = be32_to_cpup(p);
498548

499-
for (j = 0; j < fh_count; j++) {
500-
rc = decode_nfs_fh(&stream,
501-
&fls->mirror_array[i]->dss[dss_id].fh_versions[j]);
549+
dss_info->fh_versions =
550+
kcalloc(fh_count, sizeof(struct nfs_fh),
551+
gfp_flags);
552+
if (dss_info->fh_versions == NULL) {
553+
rc = -ENOMEM;
554+
goto out_err_free;
555+
}
556+
557+
for (j = 0; j < fh_count; j++) {
558+
rc = decode_nfs_fh(&stream,
559+
&dss_info->fh_versions[j]);
560+
if (rc)
561+
goto out_err_free;
562+
}
563+
564+
dss_info->fh_versions_cnt = fh_count;
565+
566+
/* user */
567+
rc = decode_name(&stream, &id);
502568
if (rc)
503569
goto out_err_free;
504-
}
505570

506-
fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count;
571+
uid = make_kuid(&init_user_ns, id);
507572

508-
/* user */
509-
rc = decode_name(&stream, &id);
510-
if (rc)
511-
goto out_err_free;
573+
/* group */
574+
rc = decode_name(&stream, &id);
575+
if (rc)
576+
goto out_err_free;
512577

513-
uid = make_kuid(&init_user_ns, id);
578+
gid = make_kgid(&init_user_ns, id);
514579

515-
/* group */
516-
rc = decode_name(&stream, &id);
517-
if (rc)
518-
goto out_err_free;
580+
if (gfp_flags & __GFP_FS)
581+
kcred = prepare_kernel_cred(&init_task);
582+
else {
583+
unsigned int nofs_flags = memalloc_nofs_save();
519584

520-
gid = make_kgid(&init_user_ns, id);
585+
kcred = prepare_kernel_cred(&init_task);
586+
memalloc_nofs_restore(nofs_flags);
587+
}
588+
rc = -ENOMEM;
589+
if (!kcred)
590+
goto out_err_free;
591+
kcred->fsuid = uid;
592+
kcred->fsgid = gid;
593+
cred = RCU_INITIALIZER(kcred);
521594

522-
if (gfp_flags & __GFP_FS)
523-
kcred = prepare_kernel_cred(&init_task);
524-
else {
525-
unsigned int nofs_flags = memalloc_nofs_save();
526-
kcred = prepare_kernel_cred(&init_task);
527-
memalloc_nofs_restore(nofs_flags);
595+
if (lgr->range.iomode == IOMODE_READ)
596+
rcu_assign_pointer(dss_info->ro_cred, cred);
597+
else
598+
rcu_assign_pointer(dss_info->rw_cred, cred);
528599
}
529-
rc = -ENOMEM;
530-
if (!kcred)
531-
goto out_err_free;
532-
kcred->fsuid = uid;
533-
kcred->fsgid = gid;
534-
cred = RCU_INITIALIZER(kcred);
535-
536-
if (lgr->range.iomode == IOMODE_READ)
537-
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
538-
else
539-
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
540600

541601
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
542602
if (mirror != fls->mirror_array[i]) {
543-
/* swap cred ptrs so free_mirror will clean up old */
544-
if (lgr->range.iomode == IOMODE_READ) {
545-
cred = xchg(&mirror->dss[dss_id].ro_cred,
546-
fls->mirror_array[i]->dss[dss_id].ro_cred);
547-
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
548-
} else {
549-
cred = xchg(&mirror->dss[dss_id].rw_cred,
550-
fls->mirror_array[i]->dss[dss_id].rw_cred);
551-
rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
603+
for (dss_id = 0; dss_id < dss_count; dss_id++) {
604+
dss_info = &fls->mirror_array[i]->dss[dss_id];
605+
/* swap cred ptrs so free_mirror will clean up old */
606+
if (lgr->range.iomode == IOMODE_READ) {
607+
cred = xchg(&mirror->dss[dss_id].ro_cred,
608+
dss_info->ro_cred);
609+
rcu_assign_pointer(dss_info->ro_cred, cred);
610+
} else {
611+
cred = xchg(&mirror->dss[dss_id].rw_cred,
612+
dss_info->rw_cred);
613+
rcu_assign_pointer(dss_info->rw_cred, cred);
614+
}
552615
}
553616
ff_layout_free_mirror(fls->mirror_array[i]);
554617
fls->mirror_array[i] = mirror;

fs/nfs/flexfilelayout/flexfilelayout.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
* due to network error etc. */
2222
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
2323

24+
#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096
25+
2426
/* LAYOUTSTATS report interval in ms */
2527
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
2628
#define FF_LAYOUTSTATS_MAXDEV 4

0 commit comments

Comments
 (0)