2828#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
2929#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
3030#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
31-
31+ #define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
32+ #define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
33+ #define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
3234/*
3335 * layout
34- * 0 64KB 65KB 66KB
35- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
36- * | 64KB | 1KB | 1KB |
36+ * 0 64KB 65KB 66KB 68KB 132KB
37+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
38+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
3739 */
40+
3841#define AMD_SRIOV_MSG_SIZE_KB 1
3942#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
4043#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
4144#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
45+ #define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
4246
4347/*
4448 * PF2VF history log:
@@ -86,30 +90,59 @@ enum amd_sriov_ucode_engine_id {
8690
8791union amd_sriov_msg_feature_flags {
8892 struct {
89- uint32_t error_log_collect : 1 ;
90- uint32_t host_load_ucodes : 1 ;
91- uint32_t host_flr_vramlost : 1 ;
92- uint32_t mm_bw_management : 1 ;
93- uint32_t pp_one_vf_mode : 1 ;
94- uint32_t reg_indirect_acc : 1 ;
95- uint32_t av1_support : 1 ;
96- uint32_t vcn_rb_decouple : 1 ;
97- uint32_t mes_info_enable : 1 ;
98- uint32_t reserved : 23 ;
93+ uint32_t error_log_collect : 1 ;
94+ uint32_t host_load_ucodes : 1 ;
95+ uint32_t host_flr_vramlost : 1 ;
96+ uint32_t mm_bw_management : 1 ;
97+ uint32_t pp_one_vf_mode : 1 ;
98+ uint32_t reg_indirect_acc : 1 ;
99+ uint32_t av1_support : 1 ;
100+ uint32_t vcn_rb_decouple : 1 ;
101+ uint32_t mes_info_dump_enable : 1 ;
102+ uint32_t ras_caps : 1 ;
103+ uint32_t ras_telemetry : 1 ;
104+ uint32_t reserved : 21 ;
99105 } flags ;
100106 uint32_t all ;
101107};
102108
103109union amd_sriov_reg_access_flags {
104110 struct {
105- uint32_t vf_reg_access_ih : 1 ;
106- uint32_t vf_reg_access_mmhub : 1 ;
107- uint32_t vf_reg_access_gc : 1 ;
108- uint32_t reserved : 29 ;
111+ uint32_t vf_reg_access_ih : 1 ;
112+ uint32_t vf_reg_access_mmhub : 1 ;
113+ uint32_t vf_reg_access_gc : 1 ;
114+ uint32_t reserved : 29 ;
109115 } flags ;
110116 uint32_t all ;
111117};
112118
119+ union amd_sriov_ras_caps {
120+ struct {
121+ uint64_t block_umc : 1 ;
122+ uint64_t block_sdma : 1 ;
123+ uint64_t block_gfx : 1 ;
124+ uint64_t block_mmhub : 1 ;
125+ uint64_t block_athub : 1 ;
126+ uint64_t block_pcie_bif : 1 ;
127+ uint64_t block_hdp : 1 ;
128+ uint64_t block_xgmi_wafl : 1 ;
129+ uint64_t block_df : 1 ;
130+ uint64_t block_smn : 1 ;
131+ uint64_t block_sem : 1 ;
132+ uint64_t block_mp0 : 1 ;
133+ uint64_t block_mp1 : 1 ;
134+ uint64_t block_fuse : 1 ;
135+ uint64_t block_mca : 1 ;
136+ uint64_t block_vcn : 1 ;
137+ uint64_t block_jpeg : 1 ;
138+ uint64_t block_ih : 1 ;
139+ uint64_t block_mpio : 1 ;
140+ uint64_t poison_propogation_mode : 1 ;
141+ uint64_t reserved : 44 ;
142+ } bits ;
143+ uint64_t all ;
144+ };
145+
113146union amd_sriov_msg_os_info {
114147 struct {
115148 uint32_t windows : 1 ;
@@ -158,7 +191,7 @@ struct amd_sriov_msg_pf2vf_info_header {
158191 uint32_t reserved [2 ];
159192};
160193
161- #define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49 )
194+ #define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55 )
162195struct amd_sriov_msg_pf2vf_info {
163196 /* header contains size and version */
164197 struct amd_sriov_msg_pf2vf_info_header header ;
@@ -211,6 +244,12 @@ struct amd_sriov_msg_pf2vf_info {
211244 uint32_t pcie_atomic_ops_support_flags ;
212245 /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
213246 uint32_t gpu_capacity ;
247+ /* vf bdf on host pci tree for debug only */
248+ uint32_t bdf_on_host ;
249+ uint32_t more_bp ; //Reserved for future use.
250+ union amd_sriov_ras_caps ras_en_caps ;
251+ union amd_sriov_ras_caps ras_telemetry_en_caps ;
252+
214253 /* reserved */
215254 uint32_t reserved [256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE ];
216255} __packed ;
@@ -283,8 +322,12 @@ enum amd_sriov_mailbox_request_message {
283322 MB_REQ_MSG_REL_GPU_FINI_ACCESS ,
284323 MB_REQ_MSG_REQ_GPU_RESET_ACCESS ,
285324 MB_REQ_MSG_REQ_GPU_INIT_DATA ,
325+ MB_REQ_MSG_PSP_VF_CMD_RELAY ,
286326
287327 MB_REQ_MSG_LOG_VF_ERROR = 200 ,
328+ MB_REQ_MSG_READY_TO_RESET = 201 ,
329+ MB_REQ_MSG_RAS_POISON = 202 ,
330+ MB_REQ_RAS_ERROR_COUNT = 203 ,
288331};
289332
290333/* mailbox message send from host to guest */
@@ -297,10 +340,60 @@ enum amd_sriov_mailbox_response_message {
297340 MB_RES_MSG_FAIL ,
298341 MB_RES_MSG_QUERY_ALIVE ,
299342 MB_RES_MSG_GPU_INIT_DATA_READY ,
343+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11 ,
300344
301345 MB_RES_MSG_TEXT_MESSAGE = 255
302346};
303347
348+ enum amd_sriov_ras_telemetry_gpu_block {
349+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0 ,
350+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1 ,
351+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2 ,
352+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3 ,
353+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4 ,
354+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5 ,
355+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6 ,
356+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7 ,
357+ RAS_TELEMETRY_GPU_BLOCK_DF = 8 ,
358+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9 ,
359+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10 ,
360+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11 ,
361+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12 ,
362+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13 ,
363+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14 ,
364+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15 ,
365+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16 ,
366+ RAS_TELEMETRY_GPU_BLOCK_IH = 17 ,
367+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18 ,
368+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19 ,
369+ };
370+
371+ struct amd_sriov_ras_telemetry_header {
372+ uint32_t checksum ;
373+ uint32_t used_size ;
374+ uint32_t reserved [2 ];
375+ };
376+
377+ struct amd_sriov_ras_telemetry_error_count {
378+ struct {
379+ uint32_t ce_count ;
380+ uint32_t ue_count ;
381+ uint32_t de_count ;
382+ uint32_t ce_overflow_count ;
383+ uint32_t ue_overflow_count ;
384+ uint32_t de_overflow_count ;
385+ uint32_t reserved [6 ];
386+ } block [RAS_TELEMETRY_GPU_BLOCK_COUNT ];
387+ };
388+
389+ struct amdsriov_ras_telemetry {
390+ struct amd_sriov_ras_telemetry_header header ;
391+
392+ union {
393+ struct amd_sriov_ras_telemetry_error_count error_count ;
394+ } body ;
395+ };
396+
304397/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
305398enum amd_sriov_gpu_init_data_version {
306399 GPU_INIT_DATA_READY_V1 = 1 ,
0 commit comments