2222#include "xe_gt_mcr.h"
2323#include "xe_gt_printk.h"
2424#include "xe_guc.h"
25+ #include "xe_guc_ads.h"
2526#include "xe_guc_capture.h"
2627#include "xe_guc_capture_types.h"
2728#include "xe_guc_ct.h"
@@ -669,6 +670,85 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
669670 return PAGE_ALIGN (total_size );
670671}
671672
673+ static int guc_capture_output_size_est (struct xe_guc * guc )
674+ {
675+ struct xe_gt * gt = guc_to_gt (guc );
676+ struct xe_hw_engine * hwe ;
677+ enum xe_hw_engine_id id ;
678+
679+ int capture_size = 0 ;
680+ size_t tmp = 0 ;
681+
682+ if (!guc -> capture )
683+ return - ENODEV ;
684+
685+ /*
686+ * If every single engine-instance suffered a failure in quick succession but
687+ * were all unrelated, then a burst of multiple error-capture events would dump
688+ * registers for every one engine instance, one at a time. In this case, GuC
689+ * would even dump the global-registers repeatedly.
690+ *
691+ * For each engine instance, there would be 1 x guc_state_capture_group_t output
692+ * followed by 3 x guc_state_capture_t lists. The latter is how the register
693+ * dumps are split across different register types (where the '3' are global vs class
694+ * vs instance).
695+ */
696+ for_each_hw_engine (hwe , gt , id ) {
697+ enum guc_capture_list_class_type capture_class ;
698+
699+ capture_class = xe_engine_class_to_guc_capture_class (hwe -> class );
700+ capture_size += sizeof (struct guc_state_capture_group_header_t ) +
701+ (3 * sizeof (struct guc_state_capture_header_t ));
702+
703+ if (!guc_capture_getlistsize (guc , 0 , GUC_STATE_CAPTURE_TYPE_GLOBAL ,
704+ 0 , & tmp , true))
705+ capture_size += tmp ;
706+ if (!guc_capture_getlistsize (guc , 0 , GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS ,
707+ capture_class , & tmp , true))
708+ capture_size += tmp ;
709+ if (!guc_capture_getlistsize (guc , 0 , GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE ,
710+ capture_class , & tmp , true))
711+ capture_size += tmp ;
712+ }
713+
714+ return capture_size ;
715+ }
716+
717+ /*
718+ * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
719+ * before the Xe can read the data out and process it
720+ */
721+ #define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
722+
723+ static void check_guc_capture_size (struct xe_guc * guc )
724+ {
725+ int capture_size = guc_capture_output_size_est (guc );
726+ int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER ;
727+ u32 buffer_size = xe_guc_log_section_size_capture (& guc -> log );
728+
729+ /*
730+ * NOTE: capture_size is much smaller than the capture region
731+ * allocation (DG2: <80K vs 1MB).
732+ * Additionally, its based on space needed to fit all engines getting
733+ * reset at once within the same G2H handler task slot. This is very
734+ * unlikely. However, if GuC really does run out of space for whatever
735+ * reason, we will see an separate warning message when processing the
736+ * G2H event capture-notification, search for:
737+ * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
738+ */
739+ if (capture_size < 0 )
740+ xe_gt_dbg (guc_to_gt (guc ),
741+ "Failed to calculate error state capture buffer minimum size: %d!\n" ,
742+ capture_size );
743+ if (capture_size > buffer_size )
744+ xe_gt_dbg (guc_to_gt (guc ), "Error state capture buffer maybe small: %d < %d\n" ,
745+ buffer_size , capture_size );
746+ else if (spare_size > buffer_size )
747+ xe_gt_dbg (guc_to_gt (guc ),
748+ "Error state capture buffer lacks spare size: %d < %d (min = %d)\n" ,
749+ buffer_size , spare_size , capture_size );
750+ }
751+
672752/*
673753 * xe_guc_capture_steered_list_init - Init steering register list
674754 * @guc: The GuC object
@@ -684,9 +764,10 @@ void xe_guc_capture_steered_list_init(struct xe_guc *guc)
684764 * the end of the pre-populated render list.
685765 */
686766 guc_capture_alloc_steered_lists (guc );
767+ check_guc_capture_size (guc );
687768}
688769
689- /**
770+ /*
690771 * xe_guc_capture_init - Init for GuC register capture
691772 * @guc: The GuC object
692773 *
0 commit comments