@@ -770,6 +770,7 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
770770static struct nfs4_pnfs_ds *
771771ff_layout_choose_ds_for_read (struct pnfs_layout_segment * lseg ,
772772 u32 start_idx , u32 * best_idx ,
773+ u32 offset , u32 * dss_id ,
773774 bool check_device )
774775{
775776 struct nfs4_ff_layout_segment * fls = FF_LAYOUT_LSEG (lseg );
@@ -780,12 +781,16 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
780781 /* mirrors are initially sorted by efficiency */
781782 for (idx = start_idx ; idx < fls -> mirror_array_cnt ; idx ++ ) {
782783 mirror = FF_LAYOUT_COMP (lseg , idx );
783- ds = nfs4_ff_layout_prepare_ds (lseg , mirror , 0 , false);
784+ * dss_id = nfs4_ff_layout_calc_dss_id (
785+ fls -> stripe_unit ,
786+ fls -> mirror_array [idx ]-> dss_count ,
787+ offset );
788+ ds = nfs4_ff_layout_prepare_ds (lseg , mirror , * dss_id , false);
784789 if (IS_ERR (ds ))
785790 continue ;
786791
787792 if (check_device &&
788- nfs4_test_deviceid_unavailable (& mirror -> dss [0 ].mirror_ds -> id_node )) {
793+ nfs4_test_deviceid_unavailable (& mirror -> dss [* dss_id ].mirror_ds -> id_node )) {
789794 // reinitialize the error state in case if this is the last iteration
790795 ds = ERR_PTR (- EINVAL );
791796 continue ;
@@ -800,42 +805,52 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
800805
801806static struct nfs4_pnfs_ds *
802807ff_layout_choose_any_ds_for_read (struct pnfs_layout_segment * lseg ,
803- u32 start_idx , u32 * best_idx )
808+ u32 start_idx , u32 * best_idx ,
809+ u32 offset , u32 * dss_id )
804810{
805- return ff_layout_choose_ds_for_read (lseg , start_idx , best_idx , false);
811+ return ff_layout_choose_ds_for_read (lseg , start_idx , best_idx ,
812+ offset , dss_id , false);
806813}
807814
808815static struct nfs4_pnfs_ds *
809816ff_layout_choose_valid_ds_for_read (struct pnfs_layout_segment * lseg ,
810- u32 start_idx , u32 * best_idx )
817+ u32 start_idx , u32 * best_idx ,
818+ u32 offset , u32 * dss_id )
811819{
812- return ff_layout_choose_ds_for_read (lseg , start_idx , best_idx , true);
820+ return ff_layout_choose_ds_for_read (lseg , start_idx , best_idx ,
821+ offset , dss_id , true);
813822}
814823
815824static struct nfs4_pnfs_ds *
816825ff_layout_choose_best_ds_for_read (struct pnfs_layout_segment * lseg ,
817- u32 start_idx , u32 * best_idx )
826+ u32 start_idx , u32 * best_idx ,
827+ u32 offset , u32 * dss_id )
818828{
819829 struct nfs4_pnfs_ds * ds ;
820830
821- ds = ff_layout_choose_valid_ds_for_read (lseg , start_idx , best_idx );
831+ ds = ff_layout_choose_valid_ds_for_read (lseg , start_idx , best_idx ,
832+ offset , dss_id );
822833 if (!IS_ERR (ds ))
823834 return ds ;
824- return ff_layout_choose_any_ds_for_read (lseg , start_idx , best_idx );
835+ return ff_layout_choose_any_ds_for_read (lseg , start_idx , best_idx ,
836+ offset , dss_id );
825837}
826838
827839static struct nfs4_pnfs_ds *
828840ff_layout_get_ds_for_read (struct nfs_pageio_descriptor * pgio ,
829- u32 * best_idx )
841+ u32 * best_idx ,
842+ u32 offset ,
843+ u32 * dss_id )
830844{
831845 struct pnfs_layout_segment * lseg = pgio -> pg_lseg ;
832846 struct nfs4_pnfs_ds * ds ;
833847
834848 ds = ff_layout_choose_best_ds_for_read (lseg , pgio -> pg_mirror_idx ,
835- best_idx );
849+ best_idx , offset , dss_id );
836850 if (!IS_ERR (ds ) || !pgio -> pg_mirror_idx )
837851 return ds ;
838- return ff_layout_choose_best_ds_for_read (lseg , 0 , best_idx );
852+ return ff_layout_choose_best_ds_for_read (lseg , 0 , best_idx ,
853+ offset , dss_id );
839854}
840855
841856static void
@@ -854,14 +869,64 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
854869 }
855870}
856871
872+ static bool
873+ ff_layout_lseg_is_striped (const struct nfs4_ff_layout_segment * fls )
874+ {
875+ return fls -> mirror_array [0 ]-> dss_count > 1 ;
876+ }
877+
878+ /*
879+ * ff_layout_pg_test(). Called by nfs_can_coalesce_requests()
880+ *
881+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
882+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
883+ */
884+ static size_t
885+ ff_layout_pg_test (struct nfs_pageio_descriptor * pgio , struct nfs_page * prev ,
886+ struct nfs_page * req )
887+ {
888+ unsigned int size ;
889+ u64 p_stripe , r_stripe ;
890+ u32 stripe_offset ;
891+ u64 segment_offset = pgio -> pg_lseg -> pls_range .offset ;
892+ u32 stripe_unit = FF_LAYOUT_LSEG (pgio -> pg_lseg )-> stripe_unit ;
893+
894+ /* calls nfs_generic_pg_test */
895+ size = pnfs_generic_pg_test (pgio , prev , req );
896+ if (!size )
897+ return 0 ;
898+ else if (!ff_layout_lseg_is_striped (FF_LAYOUT_LSEG (pgio -> pg_lseg )))
899+ return size ;
900+
901+ /* see if req and prev are in the same stripe */
902+ if (prev ) {
903+ p_stripe = (u64 )req_offset (prev ) - segment_offset ;
904+ r_stripe = (u64 )req_offset (req ) - segment_offset ;
905+ do_div (p_stripe , stripe_unit );
906+ do_div (r_stripe , stripe_unit );
907+
908+ if (p_stripe != r_stripe )
909+ return 0 ;
910+ }
911+
912+ /* calculate remaining bytes in the current stripe */
913+ div_u64_rem ((u64 )req_offset (req ) - segment_offset ,
914+ stripe_unit ,
915+ & stripe_offset );
916+ WARN_ON_ONCE (stripe_offset > stripe_unit );
917+ if (stripe_offset >= stripe_unit )
918+ return 0 ;
919+ return min (stripe_unit - (unsigned int )stripe_offset , size );
920+ }
921+
857922static void
858923ff_layout_pg_init_read (struct nfs_pageio_descriptor * pgio ,
859924 struct nfs_page * req )
860925{
861926 struct nfs_pgio_mirror * pgm ;
862927 struct nfs4_ff_layout_mirror * mirror ;
863928 struct nfs4_pnfs_ds * ds ;
864- u32 ds_idx ;
929+ u32 ds_idx , dss_id ;
865930
866931 if (NFS_SERVER (pgio -> pg_inode )-> flags &
867932 (NFS_MOUNT_SOFT |NFS_MOUNT_SOFTERR ))
@@ -882,7 +947,8 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
882947 /* Reset wb_nio, since getting layout segment was successful */
883948 req -> wb_nio = 0 ;
884949
885- ds = ff_layout_get_ds_for_read (pgio , & ds_idx );
950+ ds = ff_layout_get_ds_for_read (pgio , & ds_idx ,
951+ req_offset (req ), & dss_id );
886952 if (IS_ERR (ds )) {
887953 if (!ff_layout_no_fallback_to_mds (pgio -> pg_lseg ))
888954 goto out_mds ;
@@ -894,7 +960,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
894960
895961 mirror = FF_LAYOUT_COMP (pgio -> pg_lseg , ds_idx );
896962 pgm = & pgio -> pg_mirrors [0 ];
897- pgm -> pg_bsize = mirror -> dss [0 ].mirror_ds -> ds_versions [0 ].rsize ;
963+ pgm -> pg_bsize = mirror -> dss [dss_id ].mirror_ds -> ds_versions [0 ].rsize ;
898964
899965 pgio -> pg_mirror_idx = ds_idx ;
900966 return ;
@@ -1032,7 +1098,7 @@ ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
10321098
10331099static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
10341100 .pg_init = ff_layout_pg_init_read ,
1035- .pg_test = pnfs_generic_pg_test ,
1101+ .pg_test = ff_layout_pg_test ,
10361102 .pg_doio = pnfs_generic_pg_readpages ,
10371103 .pg_cleanup = pnfs_generic_pg_cleanup ,
10381104};
@@ -1087,9 +1153,11 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
10871153{
10881154 u32 idx = hdr -> pgio_mirror_idx + 1 ;
10891155 u32 new_idx = 0 ;
1156+ u32 dss_id = 0 ;
10901157 struct nfs4_pnfs_ds * ds ;
10911158
1092- ds = ff_layout_choose_any_ds_for_read (hdr -> lseg , idx , & new_idx );
1159+ ds = ff_layout_choose_any_ds_for_read (hdr -> lseg , idx , & new_idx ,
1160+ hdr -> args .offset , & dss_id );
10931161 if (IS_ERR (ds ))
10941162 pnfs_error_mark_layout_for_return (hdr -> inode , hdr -> lseg );
10951163 else
@@ -1884,41 +1952,46 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
18841952 u32 idx = hdr -> pgio_mirror_idx ;
18851953 int vers ;
18861954 struct nfs_fh * fh ;
1955+ u32 dss_id ;
18871956 bool ds_fatal_error = false;
18881957
18891958 dprintk ("--> %s ino %lu pgbase %u req %zu@%llu\n" ,
18901959 __func__ , hdr -> inode -> i_ino ,
18911960 hdr -> args .pgbase , (size_t )hdr -> args .count , offset );
18921961
18931962 mirror = FF_LAYOUT_COMP (lseg , idx );
1894- ds = nfs4_ff_layout_prepare_ds (lseg , mirror , 0 , false);
1963+ dss_id = nfs4_ff_layout_calc_dss_id (
1964+ FF_LAYOUT_LSEG (lseg )-> stripe_unit ,
1965+ mirror -> dss_count ,
1966+ offset );
1967+ ds = nfs4_ff_layout_prepare_ds (lseg , mirror , dss_id , false);
18951968 if (IS_ERR (ds )) {
18961969 ds_fatal_error = nfs_error_is_fatal (PTR_ERR (ds ));
18971970 goto out_failed ;
18981971 }
18991972
19001973 ds_clnt = nfs4_ff_find_or_create_ds_client (mirror , ds -> ds_clp ,
1901- hdr -> inode , 0 );
1974+ hdr -> inode , dss_id );
19021975 if (IS_ERR (ds_clnt ))
19031976 goto out_failed ;
19041977
1905- ds_cred = ff_layout_get_ds_cred (mirror , & lseg -> pls_range , hdr -> cred , 0 );
1978+ ds_cred = ff_layout_get_ds_cred (mirror , & lseg -> pls_range , hdr -> cred , dss_id );
19061979 if (!ds_cred )
19071980 goto out_failed ;
19081981
1909- vers = nfs4_ff_layout_ds_version (mirror , 0 );
1982+ vers = nfs4_ff_layout_ds_version (mirror , dss_id );
19101983
19111984 dprintk ("%s USE DS: %s cl_count %d vers %d\n" , __func__ ,
19121985 ds -> ds_remotestr , refcount_read (& ds -> ds_clp -> cl_count ), vers );
19131986
19141987 hdr -> pgio_done_cb = ff_layout_read_done_cb ;
19151988 refcount_inc (& ds -> ds_clp -> cl_count );
19161989 hdr -> ds_clp = ds -> ds_clp ;
1917- fh = nfs4_ff_layout_select_ds_fh (mirror , 0 );
1990+ fh = nfs4_ff_layout_select_ds_fh (mirror , dss_id );
19181991 if (fh )
19191992 hdr -> args .fh = fh ;
19201993
1921- nfs4_ff_layout_select_ds_stateid (mirror , 0 , & hdr -> args .stateid );
1994+ nfs4_ff_layout_select_ds_stateid (mirror , dss_id , & hdr -> args .stateid );
19221995
19231996 /*
19241997 * Note that if we ever decide to split across DSes,
@@ -1928,7 +2001,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
19282001 hdr -> mds_offset = offset ;
19292002
19302003 /* Start IO accounting for local read */
1931- localio = ff_local_open_fh (lseg , idx , 0 , ds -> ds_clp , ds_cred , fh , FMODE_READ );
2004+ localio = ff_local_open_fh (lseg , idx , dss_id , ds -> ds_clp , ds_cred , fh ,
2005+ FMODE_READ );
19322006 if (localio ) {
19332007 hdr -> task .tk_start = ktime_get ();
19342008 ff_layout_read_record_layoutstats_start (& hdr -> task , hdr );
0 commit comments