|
13 | 13 | #include "intel_guc_ct.h" |
14 | 14 | #include "intel_guc_print.h" |
15 | 15 |
|
| 16 | +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) |
| 17 | +enum { |
| 18 | + CT_DEAD_ALIVE = 0, |
| 19 | + CT_DEAD_SETUP, |
| 20 | + CT_DEAD_WRITE, |
| 21 | + CT_DEAD_DEADLOCK, |
| 22 | + CT_DEAD_H2G_HAS_ROOM, |
| 23 | + CT_DEAD_READ, |
| 24 | + CT_DEAD_PROCESS_FAILED, |
| 25 | +}; |
| 26 | + |
| 27 | +static void ct_dead_ct_worker_func(struct work_struct *w); |
| 28 | + |
| 29 | +#define CT_DEAD(ct, reason) \ |
| 30 | + do { \ |
| 31 | + if (!(ct)->dead_ct_reported) { \ |
| 32 | + (ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \ |
| 33 | + queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \ |
| 34 | + } \ |
| 35 | + } while (0) |
| 36 | +#else |
| 37 | +#define CT_DEAD(ct, reason) do { } while (0) |
| 38 | +#endif |
| 39 | + |
16 | 40 | static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) |
17 | 41 | { |
18 | 42 | return container_of(ct, struct intel_guc, ct); |
@@ -93,6 +117,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) |
93 | 117 | spin_lock_init(&ct->requests.lock); |
94 | 118 | INIT_LIST_HEAD(&ct->requests.pending); |
95 | 119 | INIT_LIST_HEAD(&ct->requests.incoming); |
| 120 | +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) |
| 121 | + INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); |
| 122 | +#endif |
96 | 123 | INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); |
97 | 124 | tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); |
98 | 125 | init_waitqueue_head(&ct->wq); |
@@ -319,11 +346,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) |
319 | 346 |
|
320 | 347 | ct->enabled = true; |
321 | 348 | ct->stall_time = KTIME_MAX; |
| 349 | +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) |
| 350 | + ct->dead_ct_reported = false; |
| 351 | + ct->dead_ct_reason = CT_DEAD_ALIVE; |
| 352 | +#endif |
322 | 353 |
|
323 | 354 | return 0; |
324 | 355 |
|
325 | 356 | err_out: |
326 | 357 | CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); |
| 358 | + CT_DEAD(ct, SETUP); |
327 | 359 | return err; |
328 | 360 | } |
329 | 361 |
|
@@ -434,6 +466,7 @@ static int ct_write(struct intel_guc_ct *ct, |
434 | 466 | corrupted: |
435 | 467 | CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", |
436 | 468 | desc->head, desc->tail, desc->status); |
| 469 | + CT_DEAD(ct, WRITE); |
437 | 470 | ctb->broken = true; |
438 | 471 | return -EPIPE; |
439 | 472 | } |
@@ -504,6 +537,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) |
504 | 537 | CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head); |
505 | 538 | CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail); |
506 | 539 |
|
| 540 | + CT_DEAD(ct, DEADLOCK); |
507 | 541 | ct->ctbs.send.broken = true; |
508 | 542 | } |
509 | 543 |
|
@@ -552,6 +586,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) |
552 | 586 | head, ctb->size); |
553 | 587 | desc->status |= GUC_CTB_STATUS_OVERFLOW; |
554 | 588 | ctb->broken = true; |
| 589 | + CT_DEAD(ct, H2G_HAS_ROOM); |
555 | 590 | return false; |
556 | 591 | } |
557 | 592 |
|
@@ -914,6 +949,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) |
914 | 949 | CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", |
915 | 950 | desc->head, desc->tail, desc->status); |
916 | 951 | ctb->broken = true; |
| 952 | + CT_DEAD(ct, READ); |
917 | 953 | return -EPIPE; |
918 | 954 | } |
919 | 955 |
|
@@ -1063,6 +1099,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) |
1063 | 1099 | if (unlikely(err)) { |
1064 | 1100 | CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", |
1065 | 1101 | ERR_PTR(err), 4 * request->size, request->msg); |
| 1102 | + CT_DEAD(ct, PROCESS_FAILED); |
1066 | 1103 | ct_free_msg(request); |
1067 | 1104 | } |
1068 | 1105 |
|
@@ -1239,3 +1276,19 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, |
1239 | 1276 | drm_printf(p, "Tail: %u\n", |
1240 | 1277 | ct->ctbs.recv.desc->tail); |
1241 | 1278 | } |
| 1279 | + |
| 1280 | +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) |
| 1281 | +static void ct_dead_ct_worker_func(struct work_struct *w) |
| 1282 | +{ |
| 1283 | + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); |
| 1284 | + struct intel_guc *guc = ct_to_guc(ct); |
| 1285 | + |
| 1286 | + if (ct->dead_ct_reported) |
| 1287 | + return; |
| 1288 | + |
| 1289 | + ct->dead_ct_reported = true; |
| 1290 | + |
| 1291 | + guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason); |
| 1292 | + intel_klog_error_capture(guc_to_gt(guc), (intel_engine_mask_t)~0U); |
| 1293 | +} |
| 1294 | +#endif |
0 commit comments