Skip to content

Commit 288a932

Browse files
akhilpo-qcomRob Clark
authored andcommitted
drm/msm/adreno: Introduce A8x GPU Support
A8x is the next generation of Adreno GPUs, featuring a significant hardware design change. A major update to the design is the introduction of Slice architecture. Slices are sort of mini-GPUs within the GPU which are more independent in processing Graphics and compute workloads. Also, in addition to the BV and BR pipe we saw in A7x, CP has more concurrency with additional pipes. From a software interface perspective, these changes have a significant impact on the KMD side. First, the GPU register space has been extensively reorganized. Second, to avoid a register space explosion caused by the new slice architecture and additional pipes, many registers are now virtualized, instead of duplicated as in A7x. KMD must configure an aperture register with the appropriate slice and pipe ID before accessing these virtualized registers. Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com> Patchwork: https://patchwork.freedesktop.org/patch/689019/ Message-ID: <20251118-kaana-gpu-support-v4-14-86eeb8e93fb6@oss.qualcomm.com> Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
1 parent 06cfbca commit 288a932

7 files changed

Lines changed: 1321 additions & 34 deletions

File tree

drivers/gpu/drm/msm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ adreno-y := \
2424
adreno/a6xx_gmu.o \
2525
adreno/a6xx_hfi.o \
2626
adreno/a6xx_preempt.o \
27+
adreno/a8xx_gpu.o \
2728

2829
adreno-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \
2930

drivers/gpu/drm/msm/adreno/a6xx_gmu.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,9 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
11741174
return ret;
11751175
}
11761176

1177+
/* Read the slice info on A8x GPUs */
1178+
a8xx_gpu_get_slice_info(gpu);
1179+
11771180
/* Set the bus quota to a reasonable value for boot */
11781181
a6xx_gmu_set_initial_bw(gpu, gmu);
11791182

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

Lines changed: 85 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
157157
}
158158
}
159159

160-
static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
160+
void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
161161
{
162162
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
163163
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
@@ -245,14 +245,21 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
245245
}
246246

247247
if (!sysprof) {
248-
if (!adreno_is_a7xx(adreno_gpu)) {
248+
if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
249249
/* Turn off protected mode to write to special registers */
250250
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
251251
OUT_RING(ring, 0);
252252
}
253253

254-
OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
255-
OUT_RING(ring, 1);
254+
if (adreno_is_a8xx(adreno_gpu)) {
255+
OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
256+
OUT_RING(ring, 1);
257+
OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
258+
OUT_RING(ring, 1);
259+
} else {
260+
OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
261+
OUT_RING(ring, 1);
262+
}
256263
}
257264

258265
/* Execute the table update */
@@ -281,7 +288,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
281288
* to make sure BV doesn't race ahead while BR is still switching
282289
* pagetables.
283290
*/
284-
if (adreno_is_a7xx(&a6xx_gpu->base)) {
291+
if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
285292
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
286293
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
287294
}
@@ -295,20 +302,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
295302
OUT_RING(ring, CACHE_INVALIDATE);
296303

297304
if (!sysprof) {
305+
u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
306+
REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
307+
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
298308
/*
299309
* Wait for SRAM clear after the pgtable update, so the
300310
* two can happen in parallel:
301311
*/
302312
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
303313
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
304-
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(
305-
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
314+
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
306315
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
307316
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
308317
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
309318
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
310319

311-
if (!adreno_is_a7xx(adreno_gpu)) {
320+
if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
312321
/* Re-enable protected mode: */
313322
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
314323
OUT_RING(ring, 1);
@@ -446,6 +455,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
446455
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
447456
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
448457
struct msm_ringbuffer *ring = submit->ring;
458+
u32 rbbm_perfctr_cp0, cp_always_on_counter;
449459
unsigned int i, ibs = 0;
450460

451461
adreno_check_and_reenable_stall(adreno_gpu);
@@ -466,10 +476,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
466476
if (gpu->nr_rings > 1)
467477
a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
468478

469-
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
470-
rbmemptr_stats(ring, index, cpcycles_start));
471-
get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
472-
rbmemptr_stats(ring, index, alwayson_start));
479+
if (adreno_is_a8xx(adreno_gpu)) {
480+
rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
481+
cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER;
482+
} else {
483+
rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
484+
cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER;
485+
}
486+
487+
get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
488+
get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start));
473489

474490
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
475491
OUT_RING(ring, CP_SET_THREAD_BOTH);
@@ -516,14 +532,17 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
516532
OUT_RING(ring, 0x00e); /* IB1LIST end */
517533
}
518534

519-
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
520-
rbmemptr_stats(ring, index, cpcycles_end));
521-
get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
522-
rbmemptr_stats(ring, index, alwayson_end));
535+
get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
536+
get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end));
523537

524538
/* Write the fence to the scratch register */
525-
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
526-
OUT_RING(ring, submit->seqno);
539+
if (adreno_is_a8xx(adreno_gpu)) {
540+
OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
541+
OUT_RING(ring, submit->seqno);
542+
} else {
543+
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
544+
OUT_RING(ring, submit->seqno);
545+
}
527546

528547
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
529548
OUT_RING(ring, CP_SET_THREAD_BR);
@@ -723,8 +742,11 @@ static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
723742
/* Copy the data into the internal struct to drop the const qualifier (temporarily) */
724743
*cfg = *common_cfg;
725744

726-
cfg->ubwc_swizzle = 0x6;
727-
cfg->highest_bank_bit = 15;
745+
/* Use common config as is for A8x */
746+
if (!adreno_is_a8xx(gpu)) {
747+
cfg->ubwc_swizzle = 0x6;
748+
cfg->highest_bank_bit = 15;
749+
}
728750

729751
if (adreno_is_a610(gpu)) {
730752
cfg->highest_bank_bit = 13;
@@ -1013,7 +1035,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
10131035
return false;
10141036

10151037
/* A7xx is safe! */
1016-
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
1038+
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
10171039
return true;
10181040

10191041
/*
@@ -1127,7 +1149,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
11271149
return 0;
11281150
}
11291151

1130-
static int a6xx_zap_shader_init(struct msm_gpu *gpu)
1152+
int a6xx_zap_shader_init(struct msm_gpu *gpu)
11311153
{
11321154
static bool loaded;
11331155
int ret;
@@ -2089,7 +2111,7 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
20892111
u32 fuse_val;
20902112
int ret;
20912113

2092-
if (adreno_is_a750(adreno_gpu)) {
2114+
if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
20932115
/*
20942116
* Assume that if qcom scm isn't available, that whatever
20952117
* replacement allows writing the fuse register ourselves.
@@ -2115,9 +2137,9 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
21152137
return ret;
21162138

21172139
/*
2118-
* On a750 raytracing may be disabled by the firmware, find out
2119-
* whether that's the case. The scm call above sets the fuse
2120-
* register.
2140+
* On A7XX_GEN3 and newer, raytracing may be disabled by the
2141+
* firmware, find out whether that's the case. The scm call
2142+
* above sets the fuse register.
21212143
*/
21222144
fuse_val = a6xx_llc_read(a6xx_gpu,
21232145
REG_A7XX_CX_MISC_SW_FUSE_VALUE);
@@ -2178,7 +2200,7 @@ void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_
21782200
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
21792201
{
21802202
/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2181-
if (adreno_is_a610(to_adreno_gpu(gpu)))
2203+
if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
21822204
return;
21832205

21842206
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
@@ -2209,7 +2231,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
22092231

22102232
msm_devfreq_resume(gpu);
22112233

2212-
adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
2234+
if (adreno_is_a8xx(adreno_gpu))
2235+
a8xx_llc_activate(a6xx_gpu);
2236+
else if (adreno_is_a7xx(adreno_gpu))
2237+
a7xx_llc_activate(a6xx_gpu);
2238+
else
2239+
a6xx_llc_activate(a6xx_gpu);
22132240

22142241
return ret;
22152242
}
@@ -2589,10 +2616,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
25892616
adreno_gpu->base.hw_apriv =
25902617
!!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
25912618

2592-
/* gpu->info only gets assigned in adreno_gpu_init() */
2593-
is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
2594-
config->info->family == ADRENO_7XX_GEN2 ||
2595-
config->info->family == ADRENO_7XX_GEN3;
2619+
/* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
2620+
is_a7xx = config->info->family >= ADRENO_7XX_GEN1;
25962621

25972622
a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
25982623

@@ -2630,7 +2655,7 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
26302655
return ERR_PTR(ret);
26312656
}
26322657

2633-
if (adreno_is_a7xx(adreno_gpu)) {
2658+
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
26342659
ret = a7xx_cx_mem_init(a6xx_gpu);
26352660
if (ret) {
26362661
a6xx_destroy(&(a6xx_gpu->base.base));
@@ -2754,3 +2779,30 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = {
27542779
.bus_halt = a6xx_bus_clear_pending_transactions,
27552780
.mmu_fault_handler = a6xx_fault_handler,
27562781
};
2782+
2783+
const struct adreno_gpu_funcs a8xx_gpu_funcs = {
2784+
.base = {
2785+
.get_param = adreno_get_param,
2786+
.set_param = adreno_set_param,
2787+
.hw_init = a8xx_hw_init,
2788+
.ucode_load = a6xx_ucode_load,
2789+
.pm_suspend = a6xx_gmu_pm_suspend,
2790+
.pm_resume = a6xx_gmu_pm_resume,
2791+
.recover = a8xx_recover,
2792+
.submit = a7xx_submit,
2793+
.active_ring = a6xx_active_ring,
2794+
.irq = a8xx_irq,
2795+
.destroy = a6xx_destroy,
2796+
.gpu_busy = a8xx_gpu_busy,
2797+
.gpu_get_freq = a6xx_gmu_get_freq,
2798+
.gpu_set_freq = a6xx_gpu_set_freq,
2799+
.create_vm = a6xx_create_vm,
2800+
.create_private_vm = a6xx_create_private_vm,
2801+
.get_rptr = a6xx_get_rptr,
2802+
.progress = a8xx_progress,
2803+
},
2804+
.init = a6xx_gpu_init,
2805+
.get_timestamp = a8xx_gmu_get_timestamp,
2806+
.bus_halt = a8xx_bus_clear_pending_transactions,
2807+
.mmu_fault_handler = a8xx_fault_handler,
2808+
};

drivers/gpu/drm/msm/adreno/a6xx_gpu.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct a6xx_info {
4646
const struct adreno_protect *protect;
4747
const struct adreno_reglist_list *pwrup_reglist;
4848
const struct adreno_reglist_list *ifpc_reglist;
49+
const struct adreno_reglist_pipe *nonctxt_reglist;
50+
u32 max_slices;
4951
u32 gmu_chipid;
5052
u32 gmu_cgc_mode;
5153
u32 prim_fifo_threshold;
@@ -101,6 +103,11 @@ struct a6xx_gpu {
101103
void *htw_llc_slice;
102104
bool have_mmu500;
103105
bool hung;
106+
107+
u32 cached_aperture;
108+
spinlock_t aperture_lock;
109+
110+
u32 slice_mask;
104111
};
105112

106113
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
@@ -302,5 +309,19 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
302309
void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
303310
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
304311
int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
305-
312+
void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
313+
int a6xx_zap_shader_init(struct msm_gpu *gpu);
314+
315+
void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
316+
int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data);
317+
void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
318+
int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value);
319+
u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate);
320+
int a8xx_gpu_feature_probe(struct msm_gpu *gpu);
321+
void a8xx_gpu_get_slice_info(struct msm_gpu *gpu);
322+
int a8xx_hw_init(struct msm_gpu *gpu);
323+
irqreturn_t a8xx_irq(struct msm_gpu *gpu);
324+
void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu);
325+
bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
326+
void a8xx_recover(struct msm_gpu *gpu);
306327
#endif /* __A6XX_GPU_H__ */

0 commit comments

Comments
 (0)