Skip to content

Commit 4934ccb

Browse files
Jonathan CurleyAnna Schumaker
authored andcommitted
NFSv4/flexfiles: Read path updates for striped layouts
Updates read path to calculate and use dss_id to direct IO to the appropriate stripe DS. Signed-off-by: Jonathan Curley <jcurley@purestorage.com> Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
1 parent a149191 commit 4934ccb

1 file changed

Lines changed: 98 additions & 24 deletions

File tree

fs/nfs/flexfilelayout/flexfilelayout.c

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,7 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
770770
static struct nfs4_pnfs_ds *
771771
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
772772
u32 start_idx, u32 *best_idx,
773+
u32 offset, u32 *dss_id,
773774
bool check_device)
774775
{
775776
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
@@ -780,12 +781,16 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
780781
/* mirrors are initially sorted by efficiency */
781782
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
782783
mirror = FF_LAYOUT_COMP(lseg, idx);
783-
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false);
784+
*dss_id = nfs4_ff_layout_calc_dss_id(
785+
fls->stripe_unit,
786+
fls->mirror_array[idx]->dss_count,
787+
offset);
788+
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false);
784789
if (IS_ERR(ds))
785790
continue;
786791

787792
if (check_device &&
788-
nfs4_test_deviceid_unavailable(&mirror->dss[0].mirror_ds->id_node)) {
793+
nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) {
789794
// reinitialize the error state in case if this is the last iteration
790795
ds = ERR_PTR(-EINVAL);
791796
continue;
@@ -800,42 +805,52 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
800805

801806
static struct nfs4_pnfs_ds *
802807
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
803-
u32 start_idx, u32 *best_idx)
808+
u32 start_idx, u32 *best_idx,
809+
u32 offset, u32 *dss_id)
804810
{
805-
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
811+
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
812+
offset, dss_id, false);
806813
}
807814

808815
static struct nfs4_pnfs_ds *
809816
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
810-
u32 start_idx, u32 *best_idx)
817+
u32 start_idx, u32 *best_idx,
818+
u32 offset, u32 *dss_id)
811819
{
812-
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
820+
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
821+
offset, dss_id, true);
813822
}
814823

815824
static struct nfs4_pnfs_ds *
816825
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
817-
u32 start_idx, u32 *best_idx)
826+
u32 start_idx, u32 *best_idx,
827+
u32 offset, u32 *dss_id)
818828
{
819829
struct nfs4_pnfs_ds *ds;
820830

821-
ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
831+
ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx,
832+
offset, dss_id);
822833
if (!IS_ERR(ds))
823834
return ds;
824-
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
835+
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx,
836+
offset, dss_id);
825837
}
826838

827839
static struct nfs4_pnfs_ds *
828840
ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
829-
u32 *best_idx)
841+
u32 *best_idx,
842+
u32 offset,
843+
u32 *dss_id)
830844
{
831845
struct pnfs_layout_segment *lseg = pgio->pg_lseg;
832846
struct nfs4_pnfs_ds *ds;
833847

834848
ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
835-
best_idx);
849+
best_idx, offset, dss_id);
836850
if (!IS_ERR(ds) || !pgio->pg_mirror_idx)
837851
return ds;
838-
return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
852+
return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx,
853+
offset, dss_id);
839854
}
840855

841856
static void
@@ -854,14 +869,64 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
854869
}
855870
}
856871

872+
static bool
873+
ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls)
874+
{
875+
return fls->mirror_array[0]->dss_count > 1;
876+
}
877+
878+
/*
879+
* ff_layout_pg_test(). Called by nfs_can_coalesce_requests()
880+
*
881+
* Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
882+
* of bytes (maximum @req->wb_bytes) that can be coalesced.
883+
*/
884+
static size_t
885+
ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
886+
struct nfs_page *req)
887+
{
888+
unsigned int size;
889+
u64 p_stripe, r_stripe;
890+
u32 stripe_offset;
891+
u64 segment_offset = pgio->pg_lseg->pls_range.offset;
892+
u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
893+
894+
/* calls nfs_generic_pg_test */
895+
size = pnfs_generic_pg_test(pgio, prev, req);
896+
if (!size)
897+
return 0;
898+
else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg)))
899+
return size;
900+
901+
/* see if req and prev are in the same stripe */
902+
if (prev) {
903+
p_stripe = (u64)req_offset(prev) - segment_offset;
904+
r_stripe = (u64)req_offset(req) - segment_offset;
905+
do_div(p_stripe, stripe_unit);
906+
do_div(r_stripe, stripe_unit);
907+
908+
if (p_stripe != r_stripe)
909+
return 0;
910+
}
911+
912+
/* calculate remaining bytes in the current stripe */
913+
div_u64_rem((u64)req_offset(req) - segment_offset,
914+
stripe_unit,
915+
&stripe_offset);
916+
WARN_ON_ONCE(stripe_offset > stripe_unit);
917+
if (stripe_offset >= stripe_unit)
918+
return 0;
919+
return min(stripe_unit - (unsigned int)stripe_offset, size);
920+
}
921+
857922
static void
858923
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
859924
struct nfs_page *req)
860925
{
861926
struct nfs_pgio_mirror *pgm;
862927
struct nfs4_ff_layout_mirror *mirror;
863928
struct nfs4_pnfs_ds *ds;
864-
u32 ds_idx;
929+
u32 ds_idx, dss_id;
865930

866931
if (NFS_SERVER(pgio->pg_inode)->flags &
867932
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
@@ -882,7 +947,8 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
882947
/* Reset wb_nio, since getting layout segment was successful */
883948
req->wb_nio = 0;
884949

885-
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
950+
ds = ff_layout_get_ds_for_read(pgio, &ds_idx,
951+
req_offset(req), &dss_id);
886952
if (IS_ERR(ds)) {
887953
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
888954
goto out_mds;
@@ -894,7 +960,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
894960

895961
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
896962
pgm = &pgio->pg_mirrors[0];
897-
pgm->pg_bsize = mirror->dss[0].mirror_ds->ds_versions[0].rsize;
963+
pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize;
898964

899965
pgio->pg_mirror_idx = ds_idx;
900966
return;
@@ -1032,7 +1098,7 @@ ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
10321098

10331099
static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
10341100
.pg_init = ff_layout_pg_init_read,
1035-
.pg_test = pnfs_generic_pg_test,
1101+
.pg_test = ff_layout_pg_test,
10361102
.pg_doio = pnfs_generic_pg_readpages,
10371103
.pg_cleanup = pnfs_generic_pg_cleanup,
10381104
};
@@ -1087,9 +1153,11 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
10871153
{
10881154
u32 idx = hdr->pgio_mirror_idx + 1;
10891155
u32 new_idx = 0;
1156+
u32 dss_id = 0;
10901157
struct nfs4_pnfs_ds *ds;
10911158

1092-
ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx);
1159+
ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx,
1160+
hdr->args.offset, &dss_id);
10931161
if (IS_ERR(ds))
10941162
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
10951163
else
@@ -1884,41 +1952,46 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
18841952
u32 idx = hdr->pgio_mirror_idx;
18851953
int vers;
18861954
struct nfs_fh *fh;
1955+
u32 dss_id;
18871956
bool ds_fatal_error = false;
18881957

18891958
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
18901959
__func__, hdr->inode->i_ino,
18911960
hdr->args.pgbase, (size_t)hdr->args.count, offset);
18921961

18931962
mirror = FF_LAYOUT_COMP(lseg, idx);
1894-
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false);
1963+
dss_id = nfs4_ff_layout_calc_dss_id(
1964+
FF_LAYOUT_LSEG(lseg)->stripe_unit,
1965+
mirror->dss_count,
1966+
offset);
1967+
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false);
18951968
if (IS_ERR(ds)) {
18961969
ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
18971970
goto out_failed;
18981971
}
18991972

19001973
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
1901-
hdr->inode, 0);
1974+
hdr->inode, dss_id);
19021975
if (IS_ERR(ds_clnt))
19031976
goto out_failed;
19041977

1905-
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, 0);
1978+
ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id);
19061979
if (!ds_cred)
19071980
goto out_failed;
19081981

1909-
vers = nfs4_ff_layout_ds_version(mirror, 0);
1982+
vers = nfs4_ff_layout_ds_version(mirror, dss_id);
19101983

19111984
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
19121985
ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
19131986

19141987
hdr->pgio_done_cb = ff_layout_read_done_cb;
19151988
refcount_inc(&ds->ds_clp->cl_count);
19161989
hdr->ds_clp = ds->ds_clp;
1917-
fh = nfs4_ff_layout_select_ds_fh(mirror, 0);
1990+
fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id);
19181991
if (fh)
19191992
hdr->args.fh = fh;
19201993

1921-
nfs4_ff_layout_select_ds_stateid(mirror, 0, &hdr->args.stateid);
1994+
nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid);
19221995

19231996
/*
19241997
* Note that if we ever decide to split across DSes,
@@ -1928,7 +2001,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
19282001
hdr->mds_offset = offset;
19292002

19302003
/* Start IO accounting for local read */
1931-
localio = ff_local_open_fh(lseg, idx, 0, ds->ds_clp, ds_cred, fh, FMODE_READ);
2004+
localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
2005+
FMODE_READ);
19322006
if (localio) {
19332007
hdr->task.tk_start = ktime_get();
19342008
ff_layout_read_record_layoutstats_start(&hdr->task, hdr);

0 commit comments

Comments
 (0)