Skip to content

Commit e368d38

Browse files
Yihang Limartinkpetersen
authored andcommitted
scsi: hisi_sas: Exit suspend state when usage count is greater than 0
When the current status of the host controller is suspended, enabling a local PHY just after disabling all local PHYs in expander environment, a hang as follows occurs: [ 486.854655] INFO: task kworker/u256:1:899 blocked for more than 120 seconds. [ 486.862207] Not tainted 6.1.0-rc4+ #1 [ 486.870545] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 486.878893] task:kworker/u256:1 state:D stack:0 pid:899 ppid:2 flags:0x00000008 [ 486.887745] Workqueue: 0000:74:02.0_disco_q sas_discover_domain [libsas] [ 486.894704] Call trace: [ 486.897400] __switch_to+0xf0/0x170 [ 486.901146] __schedule+0x3e4/0x1160 [ 486.904970] schedule+0x64/0x104 [ 486.908442] rpm_resume+0x158/0x6a0 [ 486.912163] __pm_runtime_resume+0x5c/0x84 [ 486.916489] smp_execute_task_sg+0x1f8/0x264 [libsas] [ 486.921773] sas_discover_expander.part.0+0xbc/0x720 [libsas] [ 486.927750] sas_discover_root_expander+0x90/0x154 [libsas] [ 486.933552] sas_discover_domain+0x444/0x6d0 [libsas] [ 486.938826] process_one_work+0x1e0/0x450 [ 486.943057] worker_thread+0x150/0x44c [ 486.947015] kthread+0x114/0x120 [ 486.950447] ret_from_fork+0x10/0x20 [ 486.954292] INFO: task kworker/u256:2:1780 blocked for more than 120 seconds. [ 486.961637] Not tainted 6.1.0-rc4+ #1 [ 486.966087] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 486.974356] task:kworker/u256:2 state:D stack:0 pid:1780 ppid:2 flags:0x00000208 [ 486.983141] Workqueue: 0000:74:02.0_event_q sas_port_event_worker [libsas] [ 486.990252] Call trace: [ 486.992930] __switch_to+0xf0/0x170 [ 486.996645] __schedule+0x3e4/0x1160 [ 487.000439] schedule+0x64/0x104 [ 487.003886] schedule_timeout+0x17c/0x1c0 [ 487.008102] wait_for_completion+0x7c/0x160 [ 487.012488] __flush_workqueue+0x104/0x3e0 [ 487.016782] sas_porte_bytes_dmaed+0x414/0x454 [libsas] [ 487.022203] sas_port_event_worker+0x38/0x60 [libsas] [ 487.027449] process_one_work+0x1e0/0x450 [ 487.031645] worker_thread+0x150/0x44c [ 487.035594] kthread+0x114/0x120 [ 487.039017] ret_from_fork+0x10/0x20 [ 487.042828] INFO: task bash:11488 blocked for more than 121 seconds. [ 487.049366] Not tainted 6.1.0-rc4+ #1 [ 487.053746] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 487.061953] task:bash state:D stack:0 pid:11488 ppid:10977 flags:0x00000204 [ 487.070698] Call trace: [ 487.073355] __switch_to+0xf0/0x170 [ 487.077050] __schedule+0x3e4/0x1160 [ 487.080833] schedule+0x64/0x104 [ 487.084270] schedule_timeout+0x17c/0x1c0 [ 487.088474] wait_for_completion+0x7c/0x160 [ 487.092851] __flush_workqueue+0x104/0x3e0 [ 487.097137] drain_workqueue+0xb8/0x160 [ 487.101159] __sas_drain_work+0x50/0x90 [libsas] [ 487.105963] sas_suspend_ha+0x64/0xd4 [libsas] [ 487.110590] suspend_v3_hw+0x198/0x1e8 [hisi_sas_v3_hw] [ 487.115989] pci_pm_runtime_suspend+0x5c/0x1d0 [ 487.120606] __rpm_callback+0x50/0x150 [ 487.124535] rpm_callback+0x74/0x80 [ 487.128204] rpm_suspend+0x110/0x640 [ 487.131955] rpm_idle+0x1f4/0x2d0 [ 487.135447] __pm_runtime_idle+0x58/0x94 [ 487.139538] queue_phy_enable+0xcc/0xf0 [libsas] [ 487.144330] store_sas_phy_enable+0x74/0x100 [ 487.148770] dev_attr_store+0x20/0x34 [ 487.152606] sysfs_kf_write+0x4c/0x5c [ 487.156437] kernfs_fop_write_iter+0x120/0x1b0 [ 487.161049] vfs_write+0x2d0/0x36c [ 487.164625] ksys_write+0x70/0x100 [ 487.168194] __arm64_sys_write+0x24/0x30 [ 487.172280] invoke_syscall+0x50/0x120 [ 487.176186] el0_svc_common.constprop.0+0x168/0x190 [ 487.181214] do_el0_svc+0x34/0xc0 [ 487.184680] el0_svc+0x2c/0xb4 [ 487.187879] el0t_64_sync_handler+0xb8/0xbc [ 487.192205] el0t_64_sync+0x19c/0x1a0 We find that when all local PHYs are disabled, all the devices will be removed, the ->runtime_suspend() callback suspend_v3_hw() directly execute since the controller usage count drop to 0. On the other side, the first local PHY is enabled through the sysfs interface, and ensures that function phy_up_v3_hw() is completed due to suspend_v3_hw()-> interrupt_disable_v3_hw(). In the expander scenario, sas_discover_root_expander() is executed in event work DISCE_DISCOVER_DOMAIN, which will increases the controller usage count and carry out a resume and sends SMPIO, it cannot be completed because the runtime PM status of the controller is RPM_SUSPENDING. At the same time, the ->runtime_suspend() callback suspend_v3_hw() also cannot complete the process because of drain libsas event queue in sas_suspend_ha(), so hung occurs. (thread 1) | (thread 2) ... | rpm_idle() | ... | __update_runtime_status(RPM_SUSPENDING)| ... | ... suspend_v3_hw() | smp_execute_task_sg() ... | ... interrupt_disable_v3_hw() | pm_runtime_get_sync() | ... ... | rpm_resume() //RPM_SUSPENDING | __sas_drain_work() | To fix this, check if the current runtime PM status of the controller allows to be suspended continue after interrupt_disable_v3_hw(), return immediately if not. Signed-off-by: Yihang Li <liyihang9@huawei.com> Signed-off-by: Xiang Chen <chenxiang66@hislicon.com> Link: https://lore.kernel.org/r/1679283265-115066-5-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 89954f0 commit e368d38

1 file changed

Lines changed: 56 additions & 17 deletions

File tree

drivers/scsi/hisi_sas/hisi_sas_v3_hw.c

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,27 @@ static u32 hisi_sas_phy_read32(struct hisi_hba *hisi_hba,
604604
readl_poll_timeout_atomic(regs, val, cond, delay_us, timeout_us);\
605605
})
606606

607+
static void interrupt_enable_v3_hw(struct hisi_hba *hisi_hba)
608+
{
609+
int i;
610+
611+
for (i = 0; i < hisi_hba->queue_count; i++)
612+
hisi_sas_write32(hisi_hba, OQ0_INT_SRC_MSK + 0x4 * i, 0);
613+
614+
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK1, 0xfefefefe);
615+
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK2, 0xfefefefe);
616+
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK3, 0xffc220ff);
617+
hisi_sas_write32(hisi_hba, SAS_ECC_INTR_MSK, 0x155555);
618+
619+
for (i = 0; i < hisi_hba->n_phy; i++) {
620+
hisi_sas_phy_write32(hisi_hba, i, CHL_INT1_MSK, 0xf2057fff);
621+
hisi_sas_phy_write32(hisi_hba, i, CHL_INT2_MSK, 0xffffbfe);
622+
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_NOT_RDY_MSK, 0x0);
623+
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_PHY_ENA_MSK, 0x0);
624+
hisi_sas_phy_write32(hisi_hba, i, SL_RX_BCAST_CHK_MSK, 0x0);
625+
}
626+
}
627+
607628
static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
608629
{
609630
int i, j;
@@ -624,20 +645,14 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
624645
hisi_sas_write32(hisi_hba, ENT_INT_SRC1, 0xffffffff);
625646
hisi_sas_write32(hisi_hba, ENT_INT_SRC2, 0xffffffff);
626647
hisi_sas_write32(hisi_hba, ENT_INT_SRC3, 0xffffffff);
627-
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK1, 0xfefefefe);
628-
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK2, 0xfefefefe);
629-
hisi_sas_write32(hisi_hba, ENT_INT_SRC_MSK3, 0xffc220ff);
630648
hisi_sas_write32(hisi_hba, CHNL_PHYUPDOWN_INT_MSK, 0x0);
631649
hisi_sas_write32(hisi_hba, CHNL_ENT_INT_MSK, 0x0);
632650
hisi_sas_write32(hisi_hba, HGC_COM_INT_MSK, 0x0);
633-
hisi_sas_write32(hisi_hba, SAS_ECC_INTR_MSK, 0x155555);
634651
hisi_sas_write32(hisi_hba, AWQOS_AWCACHE_CFG, 0xf0f0);
635652
hisi_sas_write32(hisi_hba, ARQOS_ARCACHE_CFG, 0xf0f0);
636-
for (i = 0; i < hisi_hba->queue_count; i++)
637-
hisi_sas_write32(hisi_hba, OQ0_INT_SRC_MSK + 0x4 * i, 0);
638-
639653
hisi_sas_write32(hisi_hba, HYPER_STREAM_ID_EN_CFG, 1);
640654

655+
interrupt_enable_v3_hw(hisi_hba);
641656
for (i = 0; i < hisi_hba->n_phy; i++) {
642657
enum sas_linkrate max;
643658
struct hisi_sas_phy *phy = &hisi_hba->phy[i];
@@ -660,13 +675,8 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
660675
hisi_sas_phy_write32(hisi_hba, i, CHL_INT1, 0xffffffff);
661676
hisi_sas_phy_write32(hisi_hba, i, CHL_INT2, 0xffffffff);
662677
hisi_sas_phy_write32(hisi_hba, i, RXOP_CHECK_CFG_H, 0x1000);
663-
hisi_sas_phy_write32(hisi_hba, i, CHL_INT1_MSK, 0xf2057fff);
664-
hisi_sas_phy_write32(hisi_hba, i, CHL_INT2_MSK, 0xffffbfe);
665678
hisi_sas_phy_write32(hisi_hba, i, PHY_CTRL_RDY_MSK, 0x0);
666-
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_NOT_RDY_MSK, 0x0);
667679
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_DWS_RESET_MSK, 0x0);
668-
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_PHY_ENA_MSK, 0x0);
669-
hisi_sas_phy_write32(hisi_hba, i, SL_RX_BCAST_CHK_MSK, 0x0);
670680
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_OOB_RESTART_MSK, 0x1);
671681
hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7f7a120);
672682
hisi_sas_phy_write32(hisi_hba, i, CON_CFG_DRIVER, 0x2a0a01);
@@ -2662,7 +2672,6 @@ static int disable_host_v3_hw(struct hisi_hba *hisi_hba)
26622672
u32 status, reg_val;
26632673
int rc;
26642674

2665-
interrupt_disable_v3_hw(hisi_hba);
26662675
hisi_sas_sync_poll_cqs(hisi_hba);
26672676
hisi_sas_write32(hisi_hba, DLVRY_QUEUE_ENABLE, 0x0);
26682677

@@ -2693,6 +2702,7 @@ static int soft_reset_v3_hw(struct hisi_hba *hisi_hba)
26932702
struct device *dev = hisi_hba->dev;
26942703
int rc;
26952704

2705+
interrupt_disable_v3_hw(hisi_hba);
26962706
rc = disable_host_v3_hw(hisi_hba);
26972707
if (rc) {
26982708
dev_err(dev, "soft reset: disable host failed rc=%d\n", rc);
@@ -5061,6 +5071,7 @@ static void hisi_sas_reset_prepare_v3_hw(struct pci_dev *pdev)
50615071
set_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags);
50625072
hisi_sas_controller_reset_prepare(hisi_hba);
50635073

5074+
interrupt_disable_v3_hw(hisi_hba);
50645075
rc = disable_host_v3_hw(hisi_hba);
50655076
if (rc)
50665077
dev_err(dev, "FLR: disable host failed rc=%d\n", rc);
@@ -5090,6 +5101,21 @@ enum {
50905101
hip08,
50915102
};
50925103

5104+
static void enable_host_v3_hw(struct hisi_hba *hisi_hba)
5105+
{
5106+
u32 reg_val;
5107+
5108+
hisi_sas_write32(hisi_hba, DLVRY_QUEUE_ENABLE,
5109+
(u32)((1ULL << hisi_hba->queue_count) - 1));
5110+
5111+
phys_init_v3_hw(hisi_hba);
5112+
reg_val = hisi_sas_read32(hisi_hba, AXI_MASTER_CFG_BASE +
5113+
AM_CTRL_GLOBAL);
5114+
reg_val &= ~AM_CTRL_SHUTDOWN_REQ_MSK;
5115+
hisi_sas_write32(hisi_hba, AXI_MASTER_CFG_BASE +
5116+
AM_CTRL_GLOBAL, reg_val);
5117+
}
5118+
50935119
static int _suspend_v3_hw(struct device *device)
50945120
{
50955121
struct pci_dev *pdev = to_pci_dev(device);
@@ -5112,14 +5138,18 @@ static int _suspend_v3_hw(struct device *device)
51125138
scsi_block_requests(shost);
51135139
set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
51145140
flush_workqueue(hisi_hba->wq);
5141+
interrupt_disable_v3_hw(hisi_hba);
5142+
5143+
if (atomic_read(&device->power.usage_count)) {
5144+
dev_err(dev, "PM suspend: host status cannot be suspended\n");
5145+
rc = -EBUSY;
5146+
goto err_out;
5147+
}
51155148

51165149
rc = disable_host_v3_hw(hisi_hba);
51175150
if (rc) {
51185151
dev_err(dev, "PM suspend: disable host failed rc=%d\n", rc);
5119-
clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
5120-
clear_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags);
5121-
scsi_unblock_requests(shost);
5122-
return rc;
5152+
goto err_out_recover_host;
51235153
}
51245154

51255155
hisi_sas_init_mem(hisi_hba);
@@ -5130,6 +5160,15 @@ static int _suspend_v3_hw(struct device *device)
51305160

51315161
dev_warn(dev, "end of suspending controller\n");
51325162
return 0;
5163+
5164+
err_out_recover_host:
5165+
enable_host_v3_hw(hisi_hba);
5166+
err_out:
5167+
interrupt_enable_v3_hw(hisi_hba);
5168+
clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
5169+
clear_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags);
5170+
scsi_unblock_requests(shost);
5171+
return rc;
51335172
}
51345173

51355174
static int _resume_v3_hw(struct device *device)

0 commit comments

Comments
 (0)