@@ -969,19 +969,32 @@ static void cmd_work_handler(struct work_struct *work)
969969 bool poll_cmd = ent -> polling ;
970970 struct mlx5_cmd_layout * lay ;
971971 struct mlx5_core_dev * dev ;
972- unsigned long cb_timeout ;
973- struct semaphore * sem ;
972+ unsigned long timeout ;
974973 unsigned long flags ;
975974 int alloc_ret ;
976975 int cmd_mode ;
977976
977+ complete (& ent -> handling );
978+
978979 dev = container_of (cmd , struct mlx5_core_dev , cmd );
979- cb_timeout = msecs_to_jiffies (mlx5_tout_ms (dev , CMD ));
980+ timeout = msecs_to_jiffies (mlx5_tout_ms (dev , CMD ));
980981
981- complete (& ent -> handling );
982- sem = ent -> page_queue ? & cmd -> vars .pages_sem : & cmd -> vars .sem ;
983- down (sem );
984982 if (!ent -> page_queue ) {
983+ if (down_timeout (& cmd -> vars .sem , timeout )) {
984+ mlx5_core_warn (dev , "%s(0x%x) timed out while waiting for a slot.\n" ,
985+ mlx5_command_str (ent -> op ), ent -> op );
986+ if (ent -> callback ) {
987+ ent -> callback (- EBUSY , ent -> context );
988+ mlx5_free_cmd_msg (dev , ent -> out );
989+ free_msg (dev , ent -> in );
990+ cmd_ent_put (ent );
991+ } else {
992+ ent -> ret = - EBUSY ;
993+ complete (& ent -> done );
994+ }
995+ complete (& ent -> slotted );
996+ return ;
997+ }
985998 alloc_ret = cmd_alloc_index (cmd , ent );
986999 if (alloc_ret < 0 ) {
9871000 mlx5_core_err_rl (dev , "failed to allocate command entry\n" );
@@ -994,17 +1007,20 @@ static void cmd_work_handler(struct work_struct *work)
9941007 ent -> ret = - EAGAIN ;
9951008 complete (& ent -> done );
9961009 }
997- up (sem );
1010+ up (& cmd -> vars . sem );
9981011 return ;
9991012 }
10001013 } else {
1014+ down (& cmd -> vars .pages_sem );
10011015 ent -> idx = cmd -> vars .max_reg_cmds ;
10021016 spin_lock_irqsave (& cmd -> alloc_lock , flags );
10031017 clear_bit (ent -> idx , & cmd -> vars .bitmask );
10041018 cmd -> ent_arr [ent -> idx ] = ent ;
10051019 spin_unlock_irqrestore (& cmd -> alloc_lock , flags );
10061020 }
10071021
1022+ complete (& ent -> slotted );
1023+
10081024 lay = get_inst (cmd , ent -> idx );
10091025 ent -> lay = lay ;
10101026 memset (lay , 0 , sizeof (* lay ));
@@ -1023,7 +1039,7 @@ static void cmd_work_handler(struct work_struct *work)
10231039 ent -> ts1 = ktime_get_ns ();
10241040 cmd_mode = cmd -> mode ;
10251041
1026- if (ent -> callback && schedule_delayed_work (& ent -> cb_timeout_work , cb_timeout ))
1042+ if (ent -> callback && schedule_delayed_work (& ent -> cb_timeout_work , timeout ))
10271043 cmd_ent_get (ent );
10281044 set_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state );
10291045
@@ -1143,6 +1159,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
11431159 ent -> ret = - ECANCELED ;
11441160 goto out_err ;
11451161 }
1162+
1163+ wait_for_completion (& ent -> slotted );
1164+
11461165 if (cmd -> mode == CMD_MODE_POLLING || ent -> polling )
11471166 wait_for_completion (& ent -> done );
11481167 else if (!wait_for_completion_timeout (& ent -> done , timeout ))
@@ -1157,6 +1176,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
11571176 } else if (err == - ECANCELED ) {
11581177 mlx5_core_warn (dev , "%s(0x%x) canceled on out of queue timeout.\n" ,
11591178 mlx5_command_str (ent -> op ), ent -> op );
1179+ } else if (err == - EBUSY ) {
1180+ mlx5_core_warn (dev , "%s(0x%x) timeout while waiting for command semaphore.\n" ,
1181+ mlx5_command_str (ent -> op ), ent -> op );
11601182 }
11611183 mlx5_core_dbg (dev , "err %d, delivery status %s(%d)\n" ,
11621184 err , deliv_status_to_str (ent -> status ), ent -> status );
@@ -1208,6 +1230,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
12081230 ent -> polling = force_polling ;
12091231
12101232 init_completion (& ent -> handling );
1233+ init_completion (& ent -> slotted );
12111234 if (!callback )
12121235 init_completion (& ent -> done );
12131236
@@ -1225,7 +1248,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
12251248 return 0 ; /* mlx5_cmd_comp_handler() will put(ent) */
12261249
12271250 err = wait_func (dev , ent );
1228- if (err == - ETIMEDOUT || err == - ECANCELED )
1251+ if (err == - ETIMEDOUT || err == - ECANCELED || err == - EBUSY )
12291252 goto out_free ;
12301253
12311254 ds = ent -> ts2 - ent -> ts1 ;
@@ -1611,6 +1634,9 @@ static int cmd_comp_notifier(struct notifier_block *nb,
16111634 dev = container_of (cmd , struct mlx5_core_dev , cmd );
16121635 eqe = data ;
16131636
1637+ if (dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR )
1638+ return NOTIFY_DONE ;
1639+
16141640 mlx5_cmd_comp_handler (dev , be32_to_cpu (eqe -> data .cmd .vector ), false);
16151641
16161642 return NOTIFY_OK ;
0 commit comments