@@ -206,10 +206,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
206206 };
207207}
208208
209+ static void ublk_print_cpu_set (const cpu_set_t * set , char * buf , unsigned len )
210+ {
211+ unsigned done = 0 ;
212+ int i ;
213+
214+ for (i = 0 ; i < CPU_SETSIZE ; i ++ ) {
215+ if (CPU_ISSET (i , set ))
216+ done += snprintf (& buf [done ], len - done , "%d " , i );
217+ }
218+ }
219+
220+ static void ublk_adjust_affinity (cpu_set_t * set )
221+ {
222+ int j , updated = 0 ;
223+
224+ /*
225+ * Just keep the 1st CPU now.
226+ *
227+ * In future, auto affinity selection can be tried.
228+ */
229+ for (j = 0 ; j < CPU_SETSIZE ; j ++ ) {
230+ if (CPU_ISSET (j , set )) {
231+ if (!updated ) {
232+ updated = 1 ;
233+ continue ;
234+ }
235+ CPU_CLR (j , set );
236+ }
237+ }
238+ }
239+
240+ /* Caller must free the allocated buffer */
241+ static int ublk_ctrl_get_affinity (struct ublk_dev * ctrl_dev , cpu_set_t * * ptr_buf )
242+ {
243+ struct ublk_ctrl_cmd_data data = {
244+ .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY ,
245+ .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF ,
246+ };
247+ cpu_set_t * buf ;
248+ int i , ret ;
249+
250+ buf = malloc (sizeof (cpu_set_t ) * ctrl_dev -> dev_info .nr_hw_queues );
251+ if (!buf )
252+ return - ENOMEM ;
253+
254+ for (i = 0 ; i < ctrl_dev -> dev_info .nr_hw_queues ; i ++ ) {
255+ data .data [0 ] = i ;
256+ data .len = sizeof (cpu_set_t );
257+ data .addr = (__u64 )& buf [i ];
258+
259+ ret = __ublk_ctrl_cmd (ctrl_dev , & data );
260+ if (ret < 0 ) {
261+ free (buf );
262+ return ret ;
263+ }
264+ ublk_adjust_affinity (& buf [i ]);
265+ }
266+
267+ * ptr_buf = buf ;
268+ return 0 ;
269+ }
270+
209271static void ublk_ctrl_dump (struct ublk_dev * dev )
210272{
211273 struct ublksrv_ctrl_dev_info * info = & dev -> dev_info ;
212274 struct ublk_params p ;
275+ cpu_set_t * affinity ;
213276 int ret ;
214277
215278 ret = ublk_ctrl_get_params (dev , & p );
@@ -218,12 +281,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
218281 return ;
219282 }
220283
284+ ret = ublk_ctrl_get_affinity (dev , & affinity );
285+ if (ret < 0 ) {
286+ ublk_err ("failed to get affinity %m\n" );
287+ return ;
288+ }
289+
221290 ublk_log ("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n" ,
222291 info -> dev_id , info -> nr_hw_queues , info -> queue_depth ,
223292 1 << p .basic .logical_bs_shift , p .basic .dev_sectors );
224293 ublk_log ("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n" ,
225294 info -> max_io_buf_bytes , info -> ublksrv_pid , info -> flags ,
226295 ublk_dev_state_desc (dev ));
296+
297+ if (affinity ) {
298+ char buf [512 ];
299+ int i ;
300+
301+ for (i = 0 ; i < info -> nr_hw_queues ; i ++ ) {
302+ ublk_print_cpu_set (& affinity [i ], buf , sizeof (buf ));
303+ printf ("\tqueue %u: tid %d affinity(%s)\n" ,
304+ i , dev -> q [i ].tid , buf );
305+ }
306+ free (affinity );
307+ }
308+
227309 fflush (stdout );
228310}
229311
@@ -603,9 +685,24 @@ static int ublk_process_io(struct ublk_queue *q)
603685 return reapped ;
604686}
605687
688+ static void ublk_queue_set_sched_affinity (const struct ublk_queue * q ,
689+ cpu_set_t * cpuset )
690+ {
691+ if (sched_setaffinity (0 , sizeof (* cpuset ), cpuset ) < 0 )
692+ ublk_err ("ublk dev %u queue %u set affinity failed" ,
693+ q -> dev -> dev_info .dev_id , q -> q_id );
694+ }
695+
696+ struct ublk_queue_info {
697+ struct ublk_queue * q ;
698+ sem_t * queue_sem ;
699+ cpu_set_t * affinity ;
700+ };
701+
606702static void * ublk_io_handler_fn (void * data )
607703{
608- struct ublk_queue * q = data ;
704+ struct ublk_queue_info * info = data ;
705+ struct ublk_queue * q = info -> q ;
609706 int dev_id = q -> dev -> dev_info .dev_id ;
610707 int ret ;
611708
@@ -615,6 +712,10 @@ static void *ublk_io_handler_fn(void *data)
615712 dev_id , q -> q_id );
616713 return NULL ;
617714 }
715+ /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
716+ ublk_queue_set_sched_affinity (q , info -> affinity );
717+ sem_post (info -> queue_sem );
718+
618719 ublk_dbg (UBLK_DBG_QUEUE , "tid %d: ublk dev %d queue %d started\n" ,
619720 q -> tid , dev_id , q -> q_id );
620721
@@ -640,7 +741,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
640741 dev -> dev_info .dev_id , ret );
641742}
642743
643- static int ublk_send_dev_event (const struct dev_ctx * ctx , int dev_id )
744+ static int ublk_send_dev_event (const struct dev_ctx * ctx , struct ublk_dev * dev , int dev_id )
644745{
645746 uint64_t id ;
646747 int evtfd = ctx -> _evtfd ;
@@ -653,35 +754,61 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
653754 else
654755 id = ERROR_EVTFD_DEVID ;
655756
757+ if (dev && ctx -> shadow_dev )
758+ memcpy (& ctx -> shadow_dev -> q , & dev -> q , sizeof (dev -> q ));
759+
656760 if (write (evtfd , & id , sizeof (id )) != sizeof (id ))
657761 return - EINVAL ;
658762
659763 close (evtfd );
764+ shmdt (ctx -> shadow_dev );
660765
661766 return 0 ;
662767}
663768
664769
665770static int ublk_start_daemon (const struct dev_ctx * ctx , struct ublk_dev * dev )
666771{
667- int ret , i ;
668- void * thread_ret ;
669772 const struct ublksrv_ctrl_dev_info * dinfo = & dev -> dev_info ;
773+ struct ublk_queue_info * qinfo ;
774+ cpu_set_t * affinity_buf ;
775+ void * thread_ret ;
776+ sem_t queue_sem ;
777+ int ret , i ;
670778
671779 ublk_dbg (UBLK_DBG_DEV , "%s enter\n" , __func__ );
672780
781+ qinfo = (struct ublk_queue_info * )calloc (sizeof (struct ublk_queue_info ),
782+ dinfo -> nr_hw_queues );
783+ if (!qinfo )
784+ return - ENOMEM ;
785+
786+ sem_init (& queue_sem , 0 , 0 );
673787 ret = ublk_dev_prep (ctx , dev );
674788 if (ret )
675789 return ret ;
676790
791+ ret = ublk_ctrl_get_affinity (dev , & affinity_buf );
792+ if (ret )
793+ return ret ;
794+
677795 for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ ) {
678796 dev -> q [i ].dev = dev ;
679797 dev -> q [i ].q_id = i ;
798+
799+ qinfo [i ].q = & dev -> q [i ];
800+ qinfo [i ].queue_sem = & queue_sem ;
801+ qinfo [i ].affinity = & affinity_buf [i ];
680802 pthread_create (& dev -> q [i ].thread , NULL ,
681803 ublk_io_handler_fn ,
682- & dev -> q [i ]);
804+ & qinfo [i ]);
683805 }
684806
807+ for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ )
808+ sem_wait (& queue_sem );
809+ free (qinfo );
810+ free (affinity_buf );
811+
685812 /* everything is fine now, start us */
686813 ublk_set_parameters (dev );
687814 ret = ublk_ctrl_start_dev (dev , getpid ());
@@ -694,7 +821,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
694821 if (ctx -> fg )
695822 ublk_ctrl_dump (dev );
696823 else
697- ublk_send_dev_event (ctx , dev -> dev_info .dev_id );
824+ ublk_send_dev_event (ctx , dev , dev -> dev_info .dev_id );
698825
699826 /* wait until we are terminated */
700827 for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ )
@@ -873,7 +1000,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
8731000
8741001fail :
8751002 if (ret < 0 )
876- ublk_send_dev_event (ctx , -1 );
1003+ ublk_send_dev_event (ctx , dev , -1 );
8771004 ublk_ctrl_deinit (dev );
8781005 return ret ;
8791006}
@@ -887,6 +1014,16 @@ static int cmd_dev_add(struct dev_ctx *ctx)
8871014 if (ctx -> fg )
8881015 goto run ;
8891016
1017+ ctx -> _shmid = shmget (IPC_PRIVATE , sizeof (struct ublk_dev ), IPC_CREAT | 0666 );
1018+ if (ctx -> _shmid < 0 ) {
1019+ ublk_err ("%s: failed to shmget %s\n" , __func__ , strerror (errno ));
1020+ exit (-1 );
1021+ }
1022+ ctx -> shadow_dev = (struct ublk_dev * )shmat (ctx -> _shmid , NULL , 0 );
1023+ if (ctx -> shadow_dev == (struct ublk_dev * )-1 ) {
1024+ ublk_err ("%s: failed to shmat %s\n" , __func__ , strerror (errno ));
1025+ exit (-1 );
1026+ }
8901027 ctx -> _evtfd = eventfd (0 , 0 );
8911028 if (ctx -> _evtfd < 0 ) {
8921029 ublk_err ("%s: failed to create eventfd %s\n" , __func__ , strerror (errno ));
@@ -922,6 +1059,8 @@ static int cmd_dev_add(struct dev_ctx *ctx)
9221059 if (__cmd_dev_list (ctx ) >= 0 )
9231060 exit_code = EXIT_SUCCESS ;
9241061 }
1062+ shmdt (ctx -> shadow_dev );
1063+ shmctl (ctx -> _shmid , IPC_RMID , NULL );
9251064 /* wait for child and detach from it */
9261065 wait (NULL );
9271066 exit (exit_code );
@@ -988,6 +1127,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
9881127 ublk_err ("%s: can't get dev info from %d: %d\n" ,
9891128 __func__ , ctx -> dev_id , ret );
9901129 } else {
1130+ if (ctx -> shadow_dev )
1131+ memcpy (& dev -> q , ctx -> shadow_dev -> q , sizeof (dev -> q ));
1132+
9911133 ublk_ctrl_dump (dev );
9921134 }
9931135
0 commit comments