Skip to content

Commit 17488f1

Browse files
abattersbymartinkpetersen
authored andcommitted
scsi: qla2xxx: target: Fix races with aborting commands
cmd->cmd_lock only protects cmd->aborted, but when deciding how to process a cmd, it is necessary to consider other factors such as cmd->state and if the chip has been reset, which are protected by qpair->qp_lock_ptr. So replace cmd_lock with qp_lock_ptr, whick makes it possible to check additional values and make decisions about what to do without racing with the CTIO handler and other code. - Lock cmd->qpair->qp_lock_ptr when aborting a cmd. - Eliminate cmd->cmd_lock and change cmd->aborted to a bitfield since it is now protected by qp_lock_ptr just like all the other flags. - Add another command state QLA_TGT_STATE_DONE to avoid any possible races between qlt_abort_cmd() and tgt_ops->free_cmd(). - Add the cmd->sent_term_exchg flag to indicate if qlt_send_term_exchange() has already been called. - Export qlt_send_term_exchange() for SCST so that it can be called directly instead of trying to make qlt_abort_cmd() work for both TMR abort and HW timeout. Signed-off-by: Tony Battersby <tonyb@cybernetics.com> Link: https://patch.msgid.link/2c8d03e4-308b-4d5a-a418-a334be23f815@cybernetics.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent d46c69a commit 17488f1

4 files changed

Lines changed: 102 additions & 116 deletions

File tree

drivers/scsi/qla2xxx/qla_os.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7248,6 +7248,7 @@ qla2xxx_wake_dpc(struct scsi_qla_host *vha)
72487248
if (!test_bit(UNLOADING, &vha->dpc_flags) && t)
72497249
wake_up_process(t);
72507250
}
7251+
EXPORT_SYMBOL(qla2xxx_wake_dpc);
72517252

72527253
/*
72537254
* qla2x00_rst_aen

drivers/scsi/qla2xxx/qla_target.c

Lines changed: 89 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ static void qlt_response_pkt(struct scsi_qla_host *ha, struct rsp_que *rsp,
104104
response_t *pkt);
105105
static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
106106
int fn, void *iocb, int flags);
107-
static void qlt_send_term_exchange(struct qla_qpair *, struct qla_tgt_cmd
108-
*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
109107
static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
110108
struct atio_from_isp *atio, uint16_t status, int qfull);
111109
static void qlt_disable_vha(struct scsi_qla_host *vha);
@@ -136,20 +134,6 @@ static struct workqueue_struct *qla_tgt_wq;
136134
static DEFINE_MUTEX(qla_tgt_mutex);
137135
static LIST_HEAD(qla_tgt_glist);
138136

139-
static const char *prot_op_str(u32 prot_op)
140-
{
141-
switch (prot_op) {
142-
case TARGET_PROT_NORMAL: return "NORMAL";
143-
case TARGET_PROT_DIN_INSERT: return "DIN_INSERT";
144-
case TARGET_PROT_DOUT_INSERT: return "DOUT_INSERT";
145-
case TARGET_PROT_DIN_STRIP: return "DIN_STRIP";
146-
case TARGET_PROT_DOUT_STRIP: return "DOUT_STRIP";
147-
case TARGET_PROT_DIN_PASS: return "DIN_PASS";
148-
case TARGET_PROT_DOUT_PASS: return "DOUT_PASS";
149-
default: return "UNKNOWN";
150-
}
151-
}
152-
153137
/* This API intentionally takes dest as a parameter, rather than returning
154138
* int value to avoid caller forgetting to issue wmb() after the store */
155139
void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -252,7 +236,7 @@ static void qlt_queue_unknown_atio(scsi_qla_host_t *vha,
252236
return;
253237

254238
out_term:
255-
qlt_send_term_exchange(vha->hw->base_qpair, NULL, atio, ha_locked, 0);
239+
qlt_send_term_exchange(vha->hw->base_qpair, NULL, atio, ha_locked);
256240
goto out;
257241
}
258242

@@ -271,7 +255,7 @@ static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha,
271255
"Freeing unknown %s %p, because of Abort\n",
272256
"ATIO_TYPE7", u);
273257
qlt_send_term_exchange(vha->hw->base_qpair, NULL,
274-
&u->atio, ha_locked, 0);
258+
&u->atio, ha_locked);
275259
goto abort;
276260
}
277261

@@ -285,7 +269,7 @@ static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha,
285269
"Freeing unknown %s %p, because tgt is being stopped\n",
286270
"ATIO_TYPE7", u);
287271
qlt_send_term_exchange(vha->hw->base_qpair, NULL,
288-
&u->atio, ha_locked, 0);
272+
&u->atio, ha_locked);
289273
} else {
290274
ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0x503d,
291275
"Reschedule u %p, vha %p, host %p\n", u, vha, host);
@@ -3461,7 +3445,6 @@ qlt_handle_dif_error(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd,
34613445
uint8_t *ep = &sts->expected_dif[0];
34623446
uint64_t lba = cmd->se_cmd.t_task_lba;
34633447
uint8_t scsi_status, sense_key, asc, ascq;
3464-
unsigned long flags;
34653448
struct scsi_qla_host *vha = cmd->vha;
34663449

34673450
cmd->trc_flags |= TRC_DIF_ERR;
@@ -3535,13 +3518,10 @@ qlt_handle_dif_error(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd,
35353518
vha->hw->tgt.tgt_ops->handle_data(cmd);
35363519
break;
35373520
default:
3538-
spin_lock_irqsave(&cmd->cmd_lock, flags);
3539-
if (cmd->aborted) {
3540-
spin_unlock_irqrestore(&cmd->cmd_lock, flags);
3521+
if (cmd->sent_term_exchg) {
35413522
vha->hw->tgt.tgt_ops->free_cmd(cmd);
35423523
break;
35433524
}
3544-
spin_unlock_irqrestore(&cmd->cmd_lock, flags);
35453525

35463526
qlt_send_resp_ctio(qpair, cmd, scsi_status, sense_key, asc,
35473527
ascq);
@@ -3696,9 +3676,22 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair,
36963676
return 0;
36973677
}
36983678

3699-
static void qlt_send_term_exchange(struct qla_qpair *qpair,
3700-
struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked,
3701-
int ul_abort)
3679+
/*
3680+
* Aborting a command that is active in the FW (i.e. cmd->cmd_sent_to_fw == 1)
3681+
* will usually trigger the FW to send a completion CTIO with error status,
3682+
* and the driver will then call the ->handle_data() or ->free_cmd() callbacks.
3683+
* This can be used to clear a command that is locked up in the FW unless there
3684+
* is something more seriously wrong.
3685+
*
3686+
* Aborting a command that is not active in the FW (i.e.
3687+
* cmd->cmd_sent_to_fw == 0) will not directly trigger any callbacks. Instead,
3688+
* when the target mode midlevel calls qlt_rdy_to_xfer() or
3689+
* qlt_xmit_response(), the driver will see that the cmd has been aborted and
3690+
* call the appropriate callback immediately without performing the requested
3691+
* operation.
3692+
*/
3693+
void qlt_send_term_exchange(struct qla_qpair *qpair,
3694+
struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked)
37023695
{
37033696
struct scsi_qla_host *vha;
37043697
unsigned long flags = 0;
@@ -3722,17 +3715,22 @@ static void qlt_send_term_exchange(struct qla_qpair *qpair,
37223715
qlt_alloc_qfull_cmd(vha, atio, 0, 0);
37233716

37243717
done:
3725-
if (cmd && !ul_abort && !cmd->aborted) {
3726-
if (cmd->sg_mapped)
3727-
qlt_unmap_sg(vha, cmd);
3728-
vha->hw->tgt.tgt_ops->free_cmd(cmd);
3718+
if (cmd) {
3719+
/*
3720+
* Set this even if -ENOMEM above, since term exchange will be
3721+
* sent eventually...
3722+
*/
3723+
cmd->sent_term_exchg = 1;
3724+
cmd->aborted = 1;
3725+
cmd->jiffies_at_term_exchg = jiffies;
37293726
}
37303727

37313728
if (!ha_locked)
37323729
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
37333730

37343731
return;
37353732
}
3733+
EXPORT_SYMBOL(qlt_send_term_exchange);
37363734

37373735
static void qlt_init_term_exchange(struct scsi_qla_host *vha)
37383736
{
@@ -3783,35 +3781,35 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha)
37833781

37843782
int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
37853783
{
3786-
struct qla_tgt *tgt = cmd->tgt;
3787-
struct scsi_qla_host *vha = tgt->vha;
3788-
struct se_cmd *se_cmd = &cmd->se_cmd;
3784+
struct scsi_qla_host *vha = cmd->vha;
3785+
struct qla_qpair *qpair = cmd->qpair;
37893786
unsigned long flags;
37903787

3791-
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014,
3792-
"qla_target(%d): terminating exchange for aborted cmd=%p "
3793-
"(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd,
3794-
se_cmd->tag);
3788+
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
37953789

3796-
spin_lock_irqsave(&cmd->cmd_lock, flags);
3797-
if (cmd->aborted) {
3798-
spin_unlock_irqrestore(&cmd->cmd_lock, flags);
3799-
/*
3800-
* It's normal to see 2 calls in this path:
3801-
* 1) XFER Rdy completion + CMD_T_ABORT
3802-
* 2) TCM TMR - drain_state_list
3803-
*/
3804-
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf016,
3805-
"multiple abort. %p transport_state %x, t_state %x, "
3806-
"se_cmd_flags %x\n", cmd, cmd->se_cmd.transport_state,
3807-
cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags);
3808-
return -EIO;
3790+
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014,
3791+
"qla_target(%d): tag %lld: cmd being aborted (state %d) %s; %s\n",
3792+
vha->vp_idx, cmd->se_cmd.tag, cmd->state,
3793+
cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
3794+
cmd->aborted ? "aborted" : "not aborted");
3795+
3796+
if (cmd->state != QLA_TGT_STATE_DONE && !cmd->sent_term_exchg) {
3797+
if (!qpair->fw_started ||
3798+
cmd->reset_count != qpair->chip_reset) {
3799+
/*
3800+
* Chip was reset; just pretend that we sent the term
3801+
* exchange.
3802+
*/
3803+
cmd->sent_term_exchg = 1;
3804+
cmd->aborted = 1;
3805+
cmd->jiffies_at_term_exchg = jiffies;
3806+
} else {
3807+
qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1);
3808+
}
38093809
}
3810-
cmd->aborted = 1;
3811-
cmd->trc_flags |= TRC_ABORT;
3812-
spin_unlock_irqrestore(&cmd->cmd_lock, flags);
38133810

3814-
qlt_send_term_exchange(cmd->qpair, cmd, &cmd->atio, 0, 1);
3811+
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3812+
38153813
return 0;
38163814
}
38173815
EXPORT_SYMBOL(qlt_abort_cmd);
@@ -3842,40 +3840,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
38423840
}
38433841
EXPORT_SYMBOL(qlt_free_cmd);
38443842

3845-
/*
3846-
* ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
3847-
*/
3848-
static int qlt_term_ctio_exchange(struct qla_qpair *qpair, void *ctio,
3849-
struct qla_tgt_cmd *cmd, uint32_t status)
3850-
{
3851-
int term = 0;
3852-
struct scsi_qla_host *vha = qpair->vha;
3853-
3854-
if (cmd->se_cmd.prot_op)
3855-
ql_dbg(ql_dbg_tgt_dif, vha, 0xe013,
3856-
"Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
3857-
"se_cmd=%p tag[%x] op %#x/%s",
3858-
cmd->lba, cmd->lba,
3859-
cmd->num_blks, &cmd->se_cmd,
3860-
cmd->atio.u.isp24.exchange_addr,
3861-
cmd->se_cmd.prot_op,
3862-
prot_op_str(cmd->se_cmd.prot_op));
3863-
3864-
if (ctio != NULL) {
3865-
struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
3866-
3867-
term = !(c->flags &
3868-
cpu_to_le16(OF_TERM_EXCH));
3869-
} else
3870-
term = 1;
3871-
3872-
if (term)
3873-
qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1, 0);
3874-
3875-
return term;
3876-
}
3877-
3878-
38793843
/* ha->hardware_lock supposed to be held on entry */
38803844
static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha,
38813845
struct rsp_que *rsp, uint32_t handle, void *ctio)
@@ -3981,22 +3945,35 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha,
39813945
qlt_unmap_sg(vha, cmd);
39823946

39833947
if (unlikely(status != CTIO_SUCCESS)) {
3948+
bool term_exchg = false;
3949+
3950+
/*
3951+
* If the hardware terminated the exchange, then we don't need
3952+
* to send an explicit term exchange message.
3953+
*/
3954+
if (ctio_flags & OF_TERM_EXCH) {
3955+
cmd->sent_term_exchg = 1;
3956+
cmd->aborted = 1;
3957+
cmd->jiffies_at_term_exchg = jiffies;
3958+
}
3959+
39843960
switch (status & 0xFFFF) {
39853961
case CTIO_INVALID_RX_ID:
3962+
term_exchg = true;
39863963
if (printk_ratelimit())
39873964
dev_info(&vha->hw->pdev->dev,
39883965
"qla_target(%d): CTIO with INVALID_RX_ID ATIO attr %x CTIO Flags %x|%x\n",
39893966
vha->vp_idx, cmd->atio.u.isp24.attr,
39903967
((cmd->ctio_flags >> 9) & 0xf),
39913968
cmd->ctio_flags);
3992-
39933969
break;
3970+
39943971
case CTIO_LIP_RESET:
39953972
case CTIO_TARGET_RESET:
39963973
case CTIO_ABORTED:
3997-
/* driver request abort via Terminate exchange */
3974+
term_exchg = true;
3975+
fallthrough;
39983976
case CTIO_TIMEOUT:
3999-
/* They are OK */
40003977
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf058,
40013978
"qla_target(%d): CTIO with "
40023979
"status %#x received, state %x, se_cmd %p, "
@@ -4017,6 +3994,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha,
40173994
logged_out ? "PORT LOGGED OUT" : "PORT UNAVAILABLE",
40183995
status, cmd->state, se_cmd);
40193996

3997+
term_exchg = true;
40203998
if (logged_out && cmd->sess) {
40213999
/*
40224000
* Session is already logged out, but we need
@@ -4062,19 +4040,21 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha,
40624040
break;
40634041
}
40644042

4043+
cmd->trc_flags |= TRC_CTIO_ERR;
40654044

4066-
/* "cmd->aborted" means
4067-
* cmd is already aborted/terminated, we don't
4068-
* need to terminate again. The exchange is already
4069-
* cleaned up/freed at FW level. Just cleanup at driver
4070-
* level.
4045+
/*
4046+
* In state QLA_TGT_STATE_NEED_DATA the failed CTIO was for
4047+
* Data-Out, so either abort the exchange or try sending check
4048+
* condition with sense data depending on the severity of
4049+
* the error. In state QLA_TGT_STATE_PROCESSED the failed CTIO
4050+
* was for status (and possibly Data-In), so don't try sending
4051+
* an error status again in that case (if the error was for
4052+
* Data-In with status, we could try sending status without
4053+
* Data-In, but we don't do that currently).
40714054
*/
4072-
if ((cmd->state != QLA_TGT_STATE_NEED_DATA) &&
4073-
(!cmd->aborted)) {
4074-
cmd->trc_flags |= TRC_CTIO_ERR;
4075-
if (qlt_term_ctio_exchange(qpair, ctio, cmd, status))
4076-
return;
4077-
}
4055+
if (!cmd->sent_term_exchg &&
4056+
(term_exchg || cmd->state != QLA_TGT_STATE_NEED_DATA))
4057+
qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1);
40784058
}
40794059

40804060
if (cmd->state == QLA_TGT_STATE_PROCESSED) {
@@ -4164,7 +4144,6 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
41644144
goto out_term;
41654145
}
41664146

4167-
spin_lock_init(&cmd->cmd_lock);
41684147
cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
41694148
cmd->se_cmd.tag = le32_to_cpu(atio->u.isp24.exchange_addr);
41704149

@@ -4201,7 +4180,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
42014180
*/
42024181
cmd->trc_flags |= TRC_DO_WORK_ERR;
42034182
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
4204-
qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1, 0);
4183+
qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1);
42054184

42064185
qlt_decr_num_pend_cmds(vha);
42074186
cmd->vha->hw->tgt.tgt_ops->rel_cmd(cmd);
@@ -5360,6 +5339,7 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
53605339
if (qlt_24xx_handle_els(vha, iocb) == 0)
53615340
send_notify_ack = 0;
53625341
break;
5342+
53635343
default:
53645344
ql_dbg(ql_dbg_tgt_mgt, vha, 0xf06d,
53655345
"qla_target(%d): Received unknown immediate "
@@ -5394,7 +5374,7 @@ static int __qlt_send_busy(struct qla_qpair *qpair,
53945374
sess = qla2x00_find_fcport_by_nportid(vha, &id, 1);
53955375
spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
53965376
if (!sess) {
5397-
qlt_send_term_exchange(qpair, NULL, atio, 1, 0);
5377+
qlt_send_term_exchange(qpair, NULL, atio, 1);
53985378
return 0;
53995379
}
54005380
/* Sending marker isn't necessary, since we called from ISR */
@@ -5623,7 +5603,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
56235603
ql_dbg(ql_dbg_tgt, vha, 0xe05f,
56245604
"qla_target: Unable to send command to target, sending TERM EXCHANGE for rsp\n");
56255605
qlt_send_term_exchange(ha->base_qpair, NULL,
5626-
atio, 1, 0);
5606+
atio, 1);
56275607
break;
56285608
case -EBUSY:
56295609
ql_dbg(ql_dbg_tgt, vha, 0xe060,
@@ -5830,7 +5810,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha,
58305810
ql_dbg(ql_dbg_tgt, vha, 0xe05f,
58315811
"qla_target: Unable to send command to target, sending TERM EXCHANGE for rsp\n");
58325812
qlt_send_term_exchange(rsp->qpair, NULL,
5833-
atio, 1, 0);
5813+
atio, 1);
58345814
break;
58355815
case -EBUSY:
58365816
ql_dbg(ql_dbg_tgt, vha, 0xe060,
@@ -6720,7 +6700,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
67206700

67216701
adjust_corrupted_atio(pkt);
67226702
qlt_send_term_exchange(ha->base_qpair, NULL, pkt,
6723-
ha_locked, 0);
6703+
ha_locked);
67246704
} else {
67256705
qlt_24xx_atio_pkt_all_vps(vha,
67266706
(struct atio_from_isp *)pkt, ha_locked);

0 commit comments

Comments
 (0)