@@ -112,11 +112,13 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
112112 do {
113113 if (test_bit (ERR_DEVICE_DETACHED , & cmdq -> flags ))
114114 return bnxt_qplib_map_rc (opcode );
115+ if (test_bit (FIRMWARE_STALL_DETECTED , & cmdq -> flags ))
116+ return - ETIMEDOUT ;
115117
116- /* Non zero means command completed */
117118 wait_event_timeout (cmdq -> waitq ,
118119 !test_bit (cbit , cmdq -> cmdq_bitmap ),
119- msecs_to_jiffies (10000 ));
120+ msecs_to_jiffies (RCFW_FW_STALL_TIMEOUT_SEC
121+ * 1000 ));
120122
121123 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
122124 return 0 ;
@@ -126,6 +128,11 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
126128 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
127129 return 0 ;
128130
131+ /* Firmware stall is detected */
132+ if (time_after (jiffies , cmdq -> last_seen +
133+ (RCFW_FW_STALL_TIMEOUT_SEC * HZ )))
134+ return - ENODEV ;
135+
129136 } while (true);
130137};
131138
@@ -154,6 +161,8 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
154161 do {
155162 if (test_bit (ERR_DEVICE_DETACHED , & cmdq -> flags ))
156163 return bnxt_qplib_map_rc (opcode );
164+ if (test_bit (FIRMWARE_STALL_DETECTED , & cmdq -> flags ))
165+ return - ETIMEDOUT ;
157166
158167 udelay (1 );
159168
@@ -184,9 +193,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
184193 hwq = & cmdq -> hwq ;
185194 pdev = rcfw -> pdev ;
186195
187- if (test_bit (FIRMWARE_TIMED_OUT , & cmdq -> flags ))
188- return - ETIMEDOUT ;
189-
190196 /* Cmdq are in 16-byte units, each request can consume 1 or more
191197 * cmdqe
192198 */
@@ -285,14 +291,21 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
285291 do {
286292 if (test_bit (ERR_DEVICE_DETACHED , & cmdq -> flags ))
287293 return bnxt_qplib_map_rc (opcode );
294+ if (test_bit (FIRMWARE_STALL_DETECTED , & cmdq -> flags ))
295+ return - ETIMEDOUT ;
288296
289297 usleep_range (1000 , 1001 );
290298
291299 bnxt_qplib_service_creq (& rcfw -> creq .creq_tasklet );
292300 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
293301 return 0 ;
294- if (jiffies_to_msecs (jiffies - issue_time ) > 10000 )
295- return - ETIMEDOUT ;
302+ if (jiffies_to_msecs (jiffies - issue_time ) >
303+ (RCFW_FW_STALL_TIMEOUT_SEC * 1000 )) {
304+ /* Firmware stall is detected */
305+ if (time_after (jiffies , cmdq -> last_seen +
306+ (RCFW_FW_STALL_TIMEOUT_SEC * HZ )))
307+ return - ENODEV ;
308+ }
296309 } while (true);
297310};
298311
@@ -308,6 +321,8 @@ static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw,
308321 /* Prevent posting if f/w is not in a state to process */
309322 if (test_bit (ERR_DEVICE_DETACHED , & rcfw -> cmdq .flags ))
310323 return - ENXIO ;
324+ if (test_bit (FIRMWARE_STALL_DETECTED , & cmdq -> flags ))
325+ return - ETIMEDOUT ;
311326
312327 if (test_bit (FIRMWARE_INITIALIZED_FLAG , & cmdq -> flags ) &&
313328 opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW ) {
@@ -375,14 +390,15 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
375390 /* timed out */
376391 dev_err (& rcfw -> pdev -> dev , "cmdq[%#x]=%#x timedout (%d)msec\n" ,
377392 cookie , opcode , RCFW_CMD_WAIT_TIME_MS );
378- set_bit (FIRMWARE_TIMED_OUT , & rcfw -> cmdq .flags );
379393 return rc ;
380394 }
381395
382396 if (rc ) {
383397 spin_lock_irqsave (& rcfw -> cmdq .hwq .lock , flags );
384398 crsqe = & rcfw -> crsqe_tbl [cbit ];
385399 crsqe -> is_waiter_alive = false;
400+ if (rc == - ENODEV )
401+ set_bit (FIRMWARE_STALL_DETECTED , & rcfw -> cmdq .flags );
386402 spin_unlock_irqrestore (& rcfw -> cmdq .hwq .lock , flags );
387403 return - ETIMEDOUT ;
388404 }
@@ -533,6 +549,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
533549 cookie &= RCFW_MAX_COOKIE_VALUE ;
534550 cbit = cookie % rcfw -> cmdq_depth ;
535551 crsqe = & rcfw -> crsqe_tbl [cbit ];
552+
553+ if (WARN_ONCE (test_bit (FIRMWARE_STALL_DETECTED ,
554+ & rcfw -> cmdq .flags ),
555+ "QPLIB: Unreponsive rcfw channel detected.!!" )) {
556+ dev_info (& pdev -> dev ,
557+ "rcfw timedout: cookie = %#x, free_slots = %d" ,
558+ cookie , crsqe -> free_slots );
559+ spin_unlock_irqrestore (& hwq -> lock , flags );
560+ return rc ;
561+ }
562+
536563 if (!test_and_clear_bit (cbit , rcfw -> cmdq .cmdq_bitmap ))
537564 dev_warn (& pdev -> dev ,
538565 "CMD bit %d was not requested\n" , cbit );
@@ -582,6 +609,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
582609 * reading any further.
583610 */
584611 dma_rmb ();
612+ rcfw -> cmdq .last_seen = jiffies ;
585613
586614 type = creqe -> type & CREQ_BASE_TYPE_MASK ;
587615 switch (type ) {
0 commit comments