Skip to content

Commit 2cda9a0

Browse files
committed
Merge tag 'drm-intel-gt-next-2025-09-12' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next
Driver Changes: - Include the GuC registers in the error state (Daniele) - Use memdup_user() (Thorsten) - Selftest improvements (Jonathan) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://lore.kernel.org/r/aMPCfRObHMg6DZAs@jlahtine-mobl
2 parents cf99b26 + ba391a1 commit 2cda9a0

6 files changed

Lines changed: 116 additions & 12 deletions

File tree

drivers/gpu/drm/i915/gem/i915_gem_context.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2158,18 +2158,12 @@ static int set_context_image(struct i915_gem_context *ctx,
21582158
goto out_ce;
21592159
}
21602160

2161-
state = kmalloc(ce->engine->context_size, GFP_KERNEL);
2162-
if (!state) {
2163-
ret = -ENOMEM;
2161+
state = memdup_user(u64_to_user_ptr(user.image), ce->engine->context_size);
2162+
if (IS_ERR(state)) {
2163+
ret = PTR_ERR(state);
21642164
goto out_ce;
21652165
}
21662166

2167-
if (copy_from_user(state, u64_to_user_ptr(user.image),
2168-
ce->engine->context_size)) {
2169-
ret = -EFAULT;
2170-
goto out_state;
2171-
}
2172-
21732167
shmem_state = shmem_create_from_data(ce->engine->name,
21742168
state, ce->engine->context_size);
21752169
if (IS_ERR(shmem_state)) {

drivers/gpu/drm/i915/gt/selftest_hangcheck.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -904,9 +904,7 @@ static void active_engine(struct kthread_work *work)
904904
arg->result = PTR_ERR(ce[count]);
905905
pr_err("[%s] Create context #%ld failed: %d!\n",
906906
engine->name, count, arg->result);
907-
if (!count)
908-
return;
909-
while (--count)
907+
while (count--)
910908
intel_context_put(ce[count]);
911909
return;
912910
}

drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ static void guc_prepare_xfer(struct intel_gt *gt)
4646
/* allows for 5us (in 10ns units) before GT can go to RC6 */
4747
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
4848
}
49+
50+
/*
51+
* Starting from IP 12.50 we need to enable the mirroring of GuC
52+
* internal state to debug registers. This is always enabled on previous
53+
* IPs.
54+
*/
55+
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50))
56+
intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG);
4957
}
5058

5159
static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,

drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#define GUC_GEN10_SHIM_WC_ENABLE (1<<21)
9797

9898
#define GUC_SHIM_CONTROL2 _MMIO(0xc068)
99+
#define GUC_ENABLE_DEBUG_REG (1<<11)
99100
#define GUC_IS_PRIVILEGED (1<<29)
100101
#define GSC_LOADS_HUC (1<<30)
101102

drivers/gpu/drm/i915/i915_gpu_error.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,74 @@ static void err_print_guc_ctb(struct drm_i915_error_state_buf *m,
685685
ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size);
686686
}
687687

688+
/* This list includes registers that are useful in debugging GuC hangs. */
689+
const struct {
690+
u32 start;
691+
u32 count;
692+
} guc_hw_reg_state[] = {
693+
{ 0xc0b0, 2 },
694+
{ 0xc000, 65 },
695+
{ 0xc140, 1 },
696+
{ 0xc180, 16 },
697+
{ 0xc1dc, 10 },
698+
{ 0xc300, 79 },
699+
{ 0xc4b4, 47 },
700+
{ 0xc574, 1 },
701+
{ 0xc57c, 1 },
702+
{ 0xc584, 11 },
703+
{ 0xc5c0, 8 },
704+
{ 0xc5e4, 1 },
705+
{ 0xc5ec, 103 },
706+
{ 0xc7c0, 1 },
707+
{ 0xc0b0, 2 }
708+
};
709+
710+
static u32 print_range_line(struct drm_i915_error_state_buf *m, u32 start, u32 *dump, u32 count)
711+
{
712+
if (count >= 8) {
713+
err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
714+
start, dump[0], dump[1], dump[2], dump[3],
715+
dump[4], dump[5], dump[6], dump[7]);
716+
return 8;
717+
} else if (count >= 4) {
718+
err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
719+
start, dump[0], dump[1], dump[2], dump[3]);
720+
return 4;
721+
} else if (count >= 2) {
722+
err_printf(m, "[0x%04x] 0x%08x 0x%08x\n", start, dump[0], dump[1]);
723+
return 2;
724+
}
725+
726+
err_printf(m, "[0x%04x] 0x%08x\n", start, dump[0]);
727+
return 1;
728+
}
729+
730+
static void err_print_guc_hw_state(struct drm_i915_error_state_buf *m, u32 *hw_state)
731+
{
732+
u32 total = 0;
733+
int i;
734+
735+
if (!hw_state)
736+
return;
737+
738+
err_printf(m, "GuC Register State:\n");
739+
740+
for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) {
741+
u32 entry = 0;
742+
743+
while (entry < guc_hw_reg_state[i].count) {
744+
u32 start = guc_hw_reg_state[i].start + entry * sizeof(u32);
745+
u32 count = guc_hw_reg_state[i].count - entry;
746+
u32 *values = hw_state + total + entry;
747+
748+
entry += print_range_line(m, start, values, count);
749+
}
750+
751+
GEM_BUG_ON(entry != guc_hw_reg_state[i].count);
752+
total += entry;
753+
}
754+
}
755+
688756
static void err_print_uc(struct drm_i915_error_state_buf *m,
689757
const struct intel_uc_coredump *error_uc)
690758
{
@@ -693,6 +761,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
693761
intel_uc_fw_dump(&error_uc->guc_fw, &p);
694762
intel_uc_fw_dump(&error_uc->huc_fw, &p);
695763
err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp);
764+
err_print_guc_hw_state(m, error_uc->guc.hw_state);
696765
intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log);
697766
err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence);
698767
err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0);
@@ -1025,6 +1094,7 @@ static void cleanup_uc(struct intel_uc_coredump *uc)
10251094
kfree(uc->huc_fw.file_wanted.path);
10261095
i915_vma_coredump_free(uc->guc.vma_log);
10271096
i915_vma_coredump_free(uc->guc.vma_ctb);
1097+
kfree(uc->guc.hw_state);
10281098

10291099
kfree(uc);
10301100
}
@@ -1721,6 +1791,37 @@ static void gt_record_guc_ctb(struct intel_ctb_coredump *saved,
17211791
saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr;
17221792
}
17231793

1794+
static u32 read_guc_state_reg(struct intel_uncore *uncore, int range, int count)
1795+
{
1796+
GEM_BUG_ON(range >= ARRAY_SIZE(guc_hw_reg_state));
1797+
GEM_BUG_ON(count >= guc_hw_reg_state[range].count);
1798+
1799+
return intel_uncore_read(uncore,
1800+
_MMIO(guc_hw_reg_state[range].start + count * sizeof(u32)));
1801+
}
1802+
1803+
static void gt_record_guc_hw_state(struct intel_uncore *uncore,
1804+
struct intel_uc_coredump *error_uc)
1805+
{
1806+
u32 *hw_state;
1807+
u32 count = 0;
1808+
int i, j;
1809+
1810+
for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++)
1811+
count += guc_hw_reg_state[i].count;
1812+
1813+
hw_state = kcalloc(count, sizeof(u32), ALLOW_FAIL);
1814+
if (!hw_state)
1815+
return;
1816+
1817+
count = 0;
1818+
for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++)
1819+
for (j = 0; j < guc_hw_reg_state[i].count; j++)
1820+
hw_state[count++] = read_guc_state_reg(uncore, i, j);
1821+
1822+
error_uc->guc.hw_state = hw_state;
1823+
}
1824+
17241825
static struct intel_uc_coredump *
17251826
gt_record_uc(struct intel_gt_coredump *gt,
17261827
struct i915_vma_compress *compress)
@@ -1755,6 +1856,7 @@ gt_record_uc(struct intel_gt_coredump *gt,
17551856
uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
17561857
gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv,
17571858
uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
1859+
gt_record_guc_hw_state(gt->_gt->uncore, error_uc);
17581860

17591861
return error_uc;
17601862
}

drivers/gpu/drm/i915/i915_gpu_error.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ struct intel_gt_coredump {
177177
struct intel_ctb_coredump ctb[2];
178178
struct i915_vma_coredump *vma_ctb;
179179
struct i915_vma_coredump *vma_log;
180+
u32 *hw_state;
180181
u32 timestamp;
181182
u16 last_fence;
182183
bool is_guc_capture;

0 commit comments

Comments
 (0)