Skip to content

Commit 77dbd7c

Browse files
Gangliang Xiealexdeucher
authored andcommitted
drm/amd/pm: implement ras_smu_drv interface for smu v13.0.12
implement ras_smu_drv interface for smu v13.0.12 Signed-off-by: Gangliang Xie <ganglxie@amd.com> Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 0c6f09e commit 77dbd7c

3 files changed

Lines changed: 156 additions & 0 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,32 @@ struct ras_critical_region {
503503
uint64_t size;
504504
};
505505

506+
struct ras_eeprom_table_version {
507+
uint32_t minor : 16;
508+
uint32_t major : 16;
509+
};
510+
511+
struct ras_eeprom_smu_funcs {
512+
int (*get_ras_table_version)(struct amdgpu_device *adev,
513+
uint32_t *table_version);
514+
int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
515+
int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
516+
int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
517+
int (*get_timestamp)(struct amdgpu_device *adev,
518+
uint16_t index, uint64_t *timestamp);
519+
int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
520+
int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
521+
};
522+
523+
enum ras_smu_feature_flags {
524+
RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
525+
};
526+
527+
struct ras_smu_drv {
528+
const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
529+
void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
530+
};
531+
506532
struct amdgpu_ras {
507533
void *ras_mgr;
508534
/* ras infrastructure */

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "amdgpu_fru_eeprom.h"
3535
#include <linux/pci.h>
3636
#include "smu_cmn.h"
37+
#include "amdgpu_ras.h"
3738

3839
#undef MP1_Public
3940
#undef smnMP1_FIRMWARE_FLAGS
@@ -925,3 +926,131 @@ const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
925926
.temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
926927
.get_temp_metrics = smu_v13_0_12_get_temp_metrics,
927928
};
929+
930+
static int smu_v13_0_12_get_ras_table_version(struct amdgpu_device *adev,
931+
uint32_t *table_version)
932+
{
933+
struct smu_context *smu = adev->powerplay.pp_handle;
934+
935+
return smu_cmn_send_smc_msg_with_param(smu,
936+
SMU_MSG_GetRASTableVersion, 0, table_version);
937+
}
938+
939+
static int smu_v13_0_12_get_badpage_count(struct amdgpu_device *adev, uint32_t *count,
940+
uint32_t timeout)
941+
{
942+
struct smu_context *smu = adev->powerplay.pp_handle;
943+
uint64_t end, now;
944+
int ret = 0;
945+
946+
now = (uint64_t)ktime_to_ms(ktime_get());
947+
end = now + timeout;
948+
do {
949+
ret = smu_cmn_send_smc_msg_with_param(smu,
950+
SMU_MSG_GetBadPageCount, 0, count);
951+
/* eeprom is not ready */
952+
if (ret != -EBUSY)
953+
return ret;
954+
mdelay(10);
955+
now = (uint64_t)ktime_to_ms(ktime_get());
956+
} while (now < end);
957+
958+
return ret;
959+
}
960+
961+
static int smu_v13_0_12_set_timestamp(struct amdgpu_device *adev, uint64_t timestamp)
962+
{
963+
struct smu_context *smu = adev->powerplay.pp_handle;
964+
965+
return smu_cmn_send_smc_msg_with_param(smu,
966+
SMU_MSG_SetTimestamp, (uint32_t)timestamp, 0);
967+
}
968+
969+
static int smu_v13_0_12_get_timestamp(struct amdgpu_device *adev,
970+
uint16_t index, uint64_t *timestamp)
971+
{
972+
struct smu_context *smu = adev->powerplay.pp_handle;
973+
uint32_t temp;
974+
int ret;
975+
976+
ret = smu_cmn_send_smc_msg_with_param(smu,
977+
SMU_MSG_GetTimestamp, index, &temp);
978+
if (!ret)
979+
*timestamp = temp;
980+
981+
return ret;
982+
}
983+
984+
static int smu_v13_0_12_get_badpage_ipid(struct amdgpu_device *adev,
985+
uint16_t index, uint64_t *ipid)
986+
{
987+
struct smu_context *smu = adev->powerplay.pp_handle;
988+
uint32_t temp_arg, temp_ipid_lo, temp_ipid_high;
989+
int ret;
990+
991+
temp_arg = index | (1 << 16);
992+
ret = smu_cmn_send_smc_msg_with_param(smu,
993+
SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_lo);
994+
if (ret)
995+
return ret;
996+
997+
temp_arg = index | (2 << 16);
998+
ret = smu_cmn_send_smc_msg_with_param(smu,
999+
SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_high);
1000+
if (!ret)
1001+
*ipid = (uint64_t)temp_ipid_high << 32 | temp_ipid_lo;
1002+
return ret;
1003+
}
1004+
1005+
static int smu_v13_0_12_erase_ras_table(struct amdgpu_device *adev,
1006+
uint32_t *result)
1007+
{
1008+
struct smu_context *smu = adev->powerplay.pp_handle;
1009+
1010+
return smu_cmn_send_smc_msg_with_param(smu,
1011+
SMU_MSG_EraseRasTable, 0, result);
1012+
}
1013+
1014+
static int smu_v13_0_12_get_badpage_mca_addr(struct amdgpu_device *adev,
1015+
uint16_t index, uint64_t *mca_addr)
1016+
{
1017+
struct smu_context *smu = adev->powerplay.pp_handle;
1018+
uint32_t temp_arg, temp_addr_lo, temp_addr_high;
1019+
int ret;
1020+
1021+
temp_arg = index | (1 << 16);
1022+
ret = smu_cmn_send_smc_msg_with_param(smu,
1023+
SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_lo);
1024+
if (ret)
1025+
return ret;
1026+
1027+
temp_arg = index | (2 << 16);
1028+
ret = smu_cmn_send_smc_msg_with_param(smu,
1029+
SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_high);
1030+
if (!ret)
1031+
*mca_addr = (uint64_t)temp_addr_high << 32 | temp_addr_lo;
1032+
return ret;
1033+
}
1034+
1035+
static const struct ras_eeprom_smu_funcs smu_v13_0_12_eeprom_smu_funcs = {
1036+
.get_ras_table_version = smu_v13_0_12_get_ras_table_version,
1037+
.get_badpage_count = smu_v13_0_12_get_badpage_count,
1038+
.get_badpage_mca_addr = smu_v13_0_12_get_badpage_mca_addr,
1039+
.set_timestamp = smu_v13_0_12_set_timestamp,
1040+
.get_timestamp = smu_v13_0_12_get_timestamp,
1041+
.get_badpage_ipid = smu_v13_0_12_get_badpage_ipid,
1042+
.erase_ras_table = smu_v13_0_12_erase_ras_table,
1043+
};
1044+
1045+
static void smu_v13_0_12_ras_smu_feature_flags(struct amdgpu_device *adev, uint64_t *flags)
1046+
{
1047+
if (!flags)
1048+
return;
1049+
1050+
*flags = 0ULL;
1051+
}
1052+
1053+
const struct ras_smu_drv smu_v13_0_12_ras_smu_drv = {
1054+
.smu_eeprom_funcs = &smu_v13_0_12_eeprom_smu_funcs,
1055+
.ras_smu_feature_flags = smu_v13_0_12_ras_smu_feature_flags,
1056+
};

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ int smu_v13_0_12_get_npm_data(struct smu_context *smu,
105105
extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
106106
extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
107107
extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
108+
extern const struct ras_smu_drv smu_v13_0_12_ras_smu_drv;
108109

109110
#if defined(SWSMU_CODE_LAYER_L2)
110111
#include "smu_cmn.h"

0 commit comments

Comments
 (0)