@@ -89,6 +89,41 @@ static int bnxt_qplib_map_rc(u8 opcode)
8989 }
9090}
9191
92+ /**
93+ * bnxt_re_is_fw_stalled - Check firmware health
94+ * @rcfw - rcfw channel instance of rdev
95+ * @cookie - cookie to track the command
96+ * @opcode - rcfw submitted for given opcode
97+ * @cbit - bitmap entry of cookie
98+ *
99+ * If firmware has not responded any rcfw command within
100+ * rcfw->max_timeout, consider firmware as stalled.
101+ *
102+ * Returns:
103+ * 0 if firmware is responding
104+ * -ENODEV if firmware is not responding
105+ */
106+ static int bnxt_re_is_fw_stalled (struct bnxt_qplib_rcfw * rcfw ,
107+ u16 cookie , u8 opcode , u16 cbit )
108+ {
109+ struct bnxt_qplib_cmdq_ctx * cmdq ;
110+
111+ cmdq = & rcfw -> cmdq ;
112+
113+ if (time_after (jiffies , cmdq -> last_seen +
114+ (rcfw -> max_timeout * HZ ))) {
115+ dev_warn_ratelimited (& rcfw -> pdev -> dev ,
116+ "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d " ,
117+ __func__ , cookie , opcode ,
118+ jiffies_to_msecs (jiffies - cmdq -> last_seen ),
119+ rcfw -> max_timeout * 1000 ,
120+ test_bit (cbit , cmdq -> cmdq_bitmap ));
121+ return - ENODEV ;
122+ }
123+
124+ return 0 ;
125+ }
126+
92127/**
93128 * __wait_for_resp - Don't hold the cpu context and wait for response
94129 * @rcfw - rcfw channel instance of rdev
@@ -105,6 +140,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
105140{
106141 struct bnxt_qplib_cmdq_ctx * cmdq ;
107142 u16 cbit ;
143+ int ret ;
108144
109145 cmdq = & rcfw -> cmdq ;
110146 cbit = cookie % rcfw -> cmdq_depth ;
@@ -118,8 +154,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
118154 wait_event_timeout (cmdq -> waitq ,
119155 !test_bit (cbit , cmdq -> cmdq_bitmap ) ||
120156 test_bit (ERR_DEVICE_DETACHED , & cmdq -> flags ),
121- msecs_to_jiffies (RCFW_FW_STALL_TIMEOUT_SEC
122- * 1000 ));
157+ msecs_to_jiffies (rcfw -> max_timeout * 1000 ));
158+
123159 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
124160 return 0 ;
125161
@@ -128,10 +164,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
128164 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
129165 return 0 ;
130166
131- /* Firmware stall is detected */
132- if (time_after (jiffies , cmdq -> last_seen +
133- (RCFW_FW_STALL_TIMEOUT_SEC * HZ )))
134- return - ENODEV ;
167+ ret = bnxt_re_is_fw_stalled (rcfw , cookie , opcode , cbit );
168+ if (ret )
169+ return ret ;
135170
136171 } while (true);
137172};
@@ -352,6 +387,7 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
352387 struct bnxt_qplib_cmdq_ctx * cmdq = & rcfw -> cmdq ;
353388 unsigned long issue_time ;
354389 u16 cbit ;
390+ int ret ;
355391
356392 cbit = cookie % rcfw -> cmdq_depth ;
357393 issue_time = jiffies ;
@@ -368,11 +404,10 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
368404 if (!test_bit (cbit , cmdq -> cmdq_bitmap ))
369405 return 0 ;
370406 if (jiffies_to_msecs (jiffies - issue_time ) >
371- (RCFW_FW_STALL_TIMEOUT_SEC * 1000 )) {
372- /* Firmware stall is detected */
373- if (time_after (jiffies , cmdq -> last_seen +
374- (RCFW_FW_STALL_TIMEOUT_SEC * HZ )))
375- return - ENODEV ;
407+ (rcfw -> max_timeout * 1000 )) {
408+ ret = bnxt_re_is_fw_stalled (rcfw , cookie , opcode , cbit );
409+ if (ret )
410+ return ret ;
376411 }
377412 } while (true);
378413};
@@ -951,6 +986,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
951986 if (!rcfw -> qp_tbl )
952987 goto fail ;
953988
989+ rcfw -> max_timeout = res -> cctx -> hwrm_cmd_max_timeout ;
990+
954991 return 0 ;
955992
956993fail :
0 commit comments