Skip to content

Commit 414ecb0

Browse files
Georgi Djakovwilldeacon
authored andcommitted
iommu/arm-smmu-qcom-debug: Add support for TBUs
Operating the TBUs (Translation Buffer Units) from Linux on Qualcomm platforms can help with debugging context faults. To help with that, the TBUs can run ATOS (Address Translation Operations) to manually trigger address translation of IOVA to physical address in hardware and provide more details when a context fault happens. The driver will control the resources needed by the TBU to allow running the debug operations such as ATOS, check for outstanding transactions, do snapshot capture etc. Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com> Link: https://lore.kernel.org/r/20240417133731.2055383-3-quic_c_gdjako@quicinc.com Signed-off-by: Will Deacon <will@kernel.org>
1 parent 54a75d8 commit 414ecb0

4 files changed

Lines changed: 365 additions & 4 deletions

File tree

drivers/iommu/Kconfig

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,14 @@ config ARM_SMMU_QCOM_DEBUG
379379
depends on ARM_SMMU_QCOM
380380
help
381381
Support for implementation specific debug features in ARM SMMU
382-
hardware found in QTI platforms.
383-
384-
Say Y here to enable debug for issues such as TLB sync timeouts
385-
which requires implementation defined register dumps.
382+
hardware found in QTI platforms. This include support for
383+
the Translation Buffer Units (TBU) that can be used to obtain
384+
additional information when debugging memory management issues
385+
like context faults.
386+
387+
Say Y here to enable debug for issues such as context faults
388+
or TLB sync timeouts which requires implementation defined
389+
register dumps.
386390

387391
config ARM_SMMU_V3
388392
tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"

drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,66 @@
11
// SPDX-License-Identifier: GPL-2.0-only
22
/*
33
* Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
4+
* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
45
*/
56

7+
#include <linux/cleanup.h>
68
#include <linux/device.h>
9+
#include <linux/interconnect.h>
710
#include <linux/firmware/qcom/qcom_scm.h>
11+
#include <linux/iopoll.h>
12+
#include <linux/list.h>
13+
#include <linux/mod_devicetable.h>
14+
#include <linux/mutex.h>
15+
#include <linux/platform_device.h>
816
#include <linux/ratelimit.h>
17+
#include <linux/spinlock.h>
918

1019
#include "arm-smmu.h"
1120
#include "arm-smmu-qcom.h"
1221

22+
#define TBU_DBG_TIMEOUT_US 100
23+
#define DEBUG_AXUSER_REG 0x30
24+
#define DEBUG_AXUSER_CDMID GENMASK_ULL(43, 36)
25+
#define DEBUG_AXUSER_CDMID_VAL 0xff
26+
#define DEBUG_PAR_REG 0x28
27+
#define DEBUG_PAR_FAULT_VAL BIT(0)
28+
#define DEBUG_PAR_PA GENMASK_ULL(47, 12)
29+
#define DEBUG_SID_HALT_REG 0x0
30+
#define DEBUG_SID_HALT_VAL BIT(16)
31+
#define DEBUG_SID_HALT_SID GENMASK(9, 0)
32+
#define DEBUG_SR_HALT_ACK_REG 0x20
33+
#define DEBUG_SR_HALT_ACK_VAL BIT(1)
34+
#define DEBUG_SR_ECATS_RUNNING_VAL BIT(0)
35+
#define DEBUG_TXN_AXCACHE GENMASK(5, 2)
36+
#define DEBUG_TXN_AXPROT GENMASK(8, 6)
37+
#define DEBUG_TXN_AXPROT_PRIV 0x1
38+
#define DEBUG_TXN_AXPROT_NSEC 0x2
39+
#define DEBUG_TXN_TRIGG_REG 0x18
40+
#define DEBUG_TXN_TRIGGER BIT(0)
41+
#define DEBUG_VA_ADDR_REG 0x8
42+
43+
static LIST_HEAD(tbu_list);
44+
static DEFINE_MUTEX(tbu_list_lock);
45+
static DEFINE_SPINLOCK(atos_lock);
46+
47+
struct qcom_tbu {
48+
struct device *dev;
49+
struct device_node *smmu_np;
50+
u32 sid_range[2];
51+
struct list_head list;
52+
struct clk *clk;
53+
struct icc_path *path;
54+
void __iomem *base;
55+
spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */
56+
int halt_count;
57+
};
58+
59+
static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
60+
{
61+
return container_of(smmu, struct qcom_smmu, smmu);
62+
}
63+
1364
void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
1465
{
1566
int ret;
@@ -49,3 +100,305 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
49100
tbu_pwr_status, sync_inv_ack, sync_inv_progress);
50101
}
51102
}
103+
104+
static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid)
105+
{
106+
struct qcom_tbu *tbu;
107+
u32 start, end;
108+
109+
guard(mutex)(&tbu_list_lock);
110+
111+
if (list_empty(&tbu_list))
112+
return NULL;
113+
114+
list_for_each_entry(tbu, &tbu_list, list) {
115+
start = tbu->sid_range[0];
116+
end = start + tbu->sid_range[1];
117+
118+
if (qsmmu->smmu.dev->of_node == tbu->smmu_np &&
119+
start <= sid && sid < end)
120+
return tbu;
121+
}
122+
dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid);
123+
124+
return NULL;
125+
}
126+
127+
static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain)
128+
{
129+
struct arm_smmu_device *smmu = smmu_domain->smmu;
130+
int ret = 0, idx = smmu_domain->cfg.cbndx;
131+
u32 val, fsr, status;
132+
133+
guard(spinlock_irqsave)(&tbu->halt_lock);
134+
if (tbu->halt_count) {
135+
tbu->halt_count++;
136+
return ret;
137+
}
138+
139+
val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
140+
val |= DEBUG_SID_HALT_VAL;
141+
writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
142+
143+
fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
144+
if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_FSR_SS)) {
145+
u32 sctlr_orig, sctlr;
146+
147+
/*
148+
* We are in a fault. Our request to halt the bus will not
149+
* complete until transactions in front of us (such as the fault
150+
* itself) have completed. Disable iommu faults and terminate
151+
* any existing transactions.
152+
*/
153+
sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
154+
sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
155+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
156+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
157+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE);
158+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
159+
}
160+
161+
if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status,
162+
(status & DEBUG_SR_HALT_ACK_VAL),
163+
0, TBU_DBG_TIMEOUT_US)) {
164+
dev_err(tbu->dev, "Timeout while trying to halt TBU!\n");
165+
ret = -ETIMEDOUT;
166+
167+
val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
168+
val &= ~DEBUG_SID_HALT_VAL;
169+
writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
170+
171+
return ret;
172+
}
173+
174+
tbu->halt_count = 1;
175+
176+
return ret;
177+
}
178+
179+
static void qcom_tbu_resume(struct qcom_tbu *tbu)
180+
{
181+
u32 val;
182+
183+
guard(spinlock_irqsave)(&tbu->halt_lock);
184+
if (!tbu->halt_count) {
185+
WARN(1, "%s: halt_count is 0", dev_name(tbu->dev));
186+
return;
187+
}
188+
189+
if (tbu->halt_count > 1) {
190+
tbu->halt_count--;
191+
return;
192+
}
193+
194+
val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
195+
val &= ~DEBUG_SID_HALT_VAL;
196+
writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
197+
198+
tbu->halt_count = 0;
199+
}
200+
201+
static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain,
202+
struct qcom_tbu *tbu, dma_addr_t iova, u32 sid)
203+
{
204+
bool atos_timedout = false;
205+
phys_addr_t phys = 0;
206+
ktime_t timeout;
207+
u64 val;
208+
209+
/* Set address and stream-id */
210+
val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG);
211+
val &= ~DEBUG_SID_HALT_SID;
212+
val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid);
213+
writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
214+
writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG);
215+
val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL);
216+
writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG);
217+
218+
/* Write-back read and write-allocate */
219+
val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf);
220+
221+
/* Non-secure access */
222+
val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC);
223+
224+
/* Privileged access */
225+
val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV);
226+
227+
val |= DEBUG_TXN_TRIGGER;
228+
writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG);
229+
230+
timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US);
231+
for (;;) {
232+
val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
233+
if (!(val & DEBUG_SR_ECATS_RUNNING_VAL))
234+
break;
235+
val = readl_relaxed(tbu->base + DEBUG_PAR_REG);
236+
if (val & DEBUG_PAR_FAULT_VAL)
237+
break;
238+
if (ktime_compare(ktime_get(), timeout) > 0) {
239+
atos_timedout = true;
240+
break;
241+
}
242+
}
243+
244+
val = readq_relaxed(tbu->base + DEBUG_PAR_REG);
245+
if (val & DEBUG_PAR_FAULT_VAL)
246+
dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n",
247+
val, sid);
248+
else if (atos_timedout)
249+
dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n");
250+
else
251+
phys = FIELD_GET(DEBUG_PAR_PA, val);
252+
253+
/* Reset hardware */
254+
writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG);
255+
writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG);
256+
val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
257+
val &= ~DEBUG_SID_HALT_SID;
258+
writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
259+
260+
return phys;
261+
}
262+
263+
static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain,
264+
dma_addr_t iova, u32 sid)
265+
{
266+
struct arm_smmu_device *smmu = smmu_domain->smmu;
267+
struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
268+
int idx = smmu_domain->cfg.cbndx;
269+
struct qcom_tbu *tbu;
270+
u32 sctlr_orig, sctlr;
271+
phys_addr_t phys = 0;
272+
int attempt = 0;
273+
int ret;
274+
u64 fsr;
275+
276+
tbu = qcom_find_tbu(qsmmu, sid);
277+
if (!tbu)
278+
return 0;
279+
280+
ret = icc_set_bw(tbu->path, 0, UINT_MAX);
281+
if (ret)
282+
return ret;
283+
284+
ret = clk_prepare_enable(tbu->clk);
285+
if (ret)
286+
goto disable_icc;
287+
288+
ret = qcom_tbu_halt(tbu, smmu_domain);
289+
if (ret)
290+
goto disable_clk;
291+
292+
/*
293+
* ATOS/ECATS can trigger the fault interrupt, so disable it temporarily
294+
* and check for an interrupt manually.
295+
*/
296+
sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
297+
sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
298+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
299+
300+
fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
301+
if (fsr & ARM_SMMU_FSR_FAULT) {
302+
/* Clear pending interrupts */
303+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
304+
305+
/*
306+
* TBU halt takes care of resuming any stalled transcation.
307+
* Kept it here for completeness sake.
308+
*/
309+
if (fsr & ARM_SMMU_FSR_SS)
310+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
311+
ARM_SMMU_RESUME_TERMINATE);
312+
}
313+
314+
/* Only one concurrent atos operation */
315+
scoped_guard(spinlock_irqsave, &atos_lock) {
316+
/*
317+
* If the translation fails, attempt the lookup more time."
318+
*/
319+
do {
320+
phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid);
321+
322+
fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
323+
if (fsr & ARM_SMMU_FSR_FAULT) {
324+
/* Clear pending interrupts */
325+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
326+
327+
if (fsr & ARM_SMMU_FSR_SS)
328+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
329+
ARM_SMMU_RESUME_TERMINATE);
330+
}
331+
} while (!phys && attempt++ < 2);
332+
333+
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
334+
}
335+
qcom_tbu_resume(tbu);
336+
337+
/* Read to complete prior write transcations */
338+
readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
339+
340+
disable_clk:
341+
clk_disable_unprepare(tbu->clk);
342+
disable_icc:
343+
icc_set_bw(tbu->path, 0, 0);
344+
345+
return phys;
346+
}
347+
348+
static int qcom_tbu_probe(struct platform_device *pdev)
349+
{
350+
struct of_phandle_args args = { .args_count = 2 };
351+
struct device_node *np = pdev->dev.of_node;
352+
struct device *dev = &pdev->dev;
353+
struct qcom_tbu *tbu;
354+
355+
tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL);
356+
if (!tbu)
357+
return -ENOMEM;
358+
359+
tbu->dev = dev;
360+
INIT_LIST_HEAD(&tbu->list);
361+
spin_lock_init(&tbu->halt_lock);
362+
363+
if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) {
364+
dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n");
365+
return -EINVAL;
366+
}
367+
368+
tbu->smmu_np = args.np;
369+
tbu->sid_range[0] = args.args[0];
370+
tbu->sid_range[1] = args.args[1];
371+
of_node_put(args.np);
372+
373+
tbu->base = devm_of_iomap(dev, np, 0, NULL);
374+
if (IS_ERR(tbu->base))
375+
return PTR_ERR(tbu->base);
376+
377+
tbu->clk = devm_clk_get_optional(dev, NULL);
378+
if (IS_ERR(tbu->clk))
379+
return PTR_ERR(tbu->clk);
380+
381+
tbu->path = devm_of_icc_get(dev, NULL);
382+
if (IS_ERR(tbu->path))
383+
return PTR_ERR(tbu->path);
384+
385+
guard(mutex)(&tbu_list_lock);
386+
list_add_tail(&tbu->list, &tbu_list);
387+
388+
return 0;
389+
}
390+
391+
static const struct of_device_id qcom_tbu_of_match[] = {
392+
{ .compatible = "qcom,sc7280-tbu" },
393+
{ .compatible = "qcom,sdm845-tbu" },
394+
{ }
395+
};
396+
397+
static struct platform_driver qcom_tbu_driver = {
398+
.driver = {
399+
.name = "qcom_tbu",
400+
.of_match_table = qcom_tbu_of_match,
401+
},
402+
.probe = qcom_tbu_probe,
403+
};
404+
builtin_platform_driver(qcom_tbu_driver);

0 commit comments

Comments
 (0)