@@ -705,6 +705,31 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
705705 return bb_disk ;
706706}
707707
708+ static bool is_sequential (struct r1conf * conf , int disk , struct r1bio * r1_bio )
709+ {
710+ /* TODO: address issues with this check and concurrency. */
711+ return conf -> mirrors [disk ].next_seq_sect == r1_bio -> sector ||
712+ conf -> mirrors [disk ].head_position == r1_bio -> sector ;
713+ }
714+
715+ /*
716+ * If buffered sequential IO size exceeds optimal iosize, check if there is idle
717+ * disk. If yes, choose the idle disk.
718+ */
719+ static bool should_choose_next (struct r1conf * conf , int disk )
720+ {
721+ struct raid1_info * mirror = & conf -> mirrors [disk ];
722+ int opt_iosize ;
723+
724+ if (!test_bit (Nonrot , & mirror -> rdev -> flags ))
725+ return false;
726+
727+ opt_iosize = bdev_io_opt (mirror -> rdev -> bdev ) >> 9 ;
728+ return opt_iosize > 0 && mirror -> seq_start != MaxSector &&
729+ mirror -> next_seq_sect > opt_iosize &&
730+ mirror -> next_seq_sect - opt_iosize >= mirror -> seq_start ;
731+ }
732+
708733/*
709734 * This routine returns the disk from which the requested read should
710735 * be done. There is a per-array 'next expected sequential IO' sector
@@ -768,43 +793,21 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
768793 pending = atomic_read (& rdev -> nr_pending );
769794 dist = abs (this_sector - conf -> mirrors [disk ].head_position );
770795 /* Don't change to another disk for sequential reads */
771- if (conf -> mirrors [disk ].next_seq_sect == this_sector
772- || dist == 0 ) {
773- int opt_iosize = bdev_io_opt (rdev -> bdev ) >> 9 ;
774- struct raid1_info * mirror = & conf -> mirrors [disk ];
775-
776- /*
777- * If buffered sequential IO size exceeds optimal
778- * iosize, check if there is idle disk. If yes, choose
779- * the idle disk. read_balance could already choose an
780- * idle disk before noticing it's a sequential IO in
781- * this disk. This doesn't matter because this disk
782- * will idle, next time it will be utilized after the
783- * first disk has IO size exceeds optimal iosize. In
784- * this way, iosize of the first disk will be optimal
785- * iosize at least. iosize of the second disk might be
786- * small, but not a big deal since when the second disk
787- * starts IO, the first disk is likely still busy.
788- */
789- if (test_bit (Nonrot , & rdev -> flags ) && opt_iosize > 0 &&
790- mirror -> seq_start != MaxSector &&
791- mirror -> next_seq_sect > opt_iosize &&
792- mirror -> next_seq_sect - opt_iosize >=
793- mirror -> seq_start ) {
794- /*
795- * Add 'pending' to avoid choosing this disk if
796- * there is other idle disk.
797- */
798- pending ++ ;
799- /*
800- * If there is no other idle disk, this disk
801- * will be chosen.
802- */
803- sequential_disk = disk ;
804- } else {
796+ if (is_sequential (conf , disk , r1_bio )) {
797+ if (!should_choose_next (conf , disk )) {
805798 best_disk = disk ;
806799 break ;
807800 }
801+ /*
802+ * Add 'pending' to avoid choosing this disk if
803+ * there is other idle disk.
804+ */
805+ pending ++ ;
806+ /*
807+ * If there is no other idle disk, this disk
808+ * will be chosen.
809+ */
810+ sequential_disk = disk ;
808811 }
809812
810813 if (min_pending > pending ) {
0 commit comments