|
23 | 23 |
|
24 | 24 | #include "amdgpu_ras.h" |
25 | 25 |
|
26 | | -static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, |
27 | | - void *ras_error_status, |
28 | | - struct amdgpu_iv_entry *entry) |
29 | | -{ |
30 | | - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); |
31 | | -} |
32 | | - |
33 | | -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) |
34 | | -{ |
35 | | - int r; |
36 | | - struct ras_fs_if fs_info = { |
37 | | - .sysfs_name = "umc_err_count", |
38 | | - }; |
39 | | - struct ras_ih_if ih_info = { |
40 | | - .cb = amdgpu_umc_process_ras_data_cb, |
41 | | - }; |
42 | | - |
43 | | - if (!adev->umc.ras_if) { |
44 | | - adev->umc.ras_if = |
45 | | - kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
46 | | - if (!adev->umc.ras_if) |
47 | | - return -ENOMEM; |
48 | | - adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; |
49 | | - adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
50 | | - adev->umc.ras_if->sub_block_index = 0; |
51 | | - } |
52 | | - ih_info.head = fs_info.head = *adev->umc.ras_if; |
53 | | - |
54 | | - r = amdgpu_ras_late_init(adev, adev->umc.ras_if, |
55 | | - &fs_info, &ih_info); |
56 | | - if (r) |
57 | | - goto free; |
58 | | - |
59 | | - if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { |
60 | | - r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); |
61 | | - if (r) |
62 | | - goto late_fini; |
63 | | - } else { |
64 | | - r = 0; |
65 | | - goto free; |
66 | | - } |
67 | | - |
68 | | - /* ras init of specific umc version */ |
69 | | - if (adev->umc.ras_funcs && |
70 | | - adev->umc.ras_funcs->err_cnt_init) |
71 | | - adev->umc.ras_funcs->err_cnt_init(adev); |
72 | | - |
73 | | - return 0; |
74 | | - |
75 | | -late_fini: |
76 | | - amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); |
77 | | -free: |
78 | | - kfree(adev->umc.ras_if); |
79 | | - adev->umc.ras_if = NULL; |
80 | | - return r; |
81 | | -} |
82 | | - |
83 | | -void amdgpu_umc_ras_fini(struct amdgpu_device *adev) |
84 | | -{ |
85 | | - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && |
86 | | - adev->umc.ras_if) { |
87 | | - struct ras_common_if *ras_if = adev->umc.ras_if; |
88 | | - struct ras_ih_if ih_info = { |
89 | | - .head = *ras_if, |
90 | | - .cb = amdgpu_umc_process_ras_data_cb, |
91 | | - }; |
92 | | - |
93 | | - amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
94 | | - kfree(ras_if); |
95 | | - } |
96 | | -} |
97 | | - |
98 | | -int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, |
| 26 | +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, |
99 | 27 | void *ras_error_status, |
100 | 28 | struct amdgpu_iv_entry *entry, |
101 | 29 | bool reset) |
@@ -180,6 +108,100 @@ int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, |
180 | 108 | return AMDGPU_RAS_SUCCESS; |
181 | 109 | } |
182 | 110 |
|
| 111 | +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, |
| 112 | + void *ras_error_status, |
| 113 | + bool reset) |
| 114 | +{ |
| 115 | + int ret; |
| 116 | + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; |
| 117 | + struct ras_common_if head = { |
| 118 | + .block = AMDGPU_RAS_BLOCK__UMC, |
| 119 | + }; |
| 120 | + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); |
| 121 | + |
| 122 | + ret = |
| 123 | + amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); |
| 124 | + |
| 125 | + if (ret == AMDGPU_RAS_SUCCESS && obj) { |
| 126 | + obj->err_data.ue_count += err_data->ue_count; |
| 127 | + obj->err_data.ce_count += err_data->ce_count; |
| 128 | + } |
| 129 | + |
| 130 | + return ret; |
| 131 | +} |
| 132 | + |
| 133 | +static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, |
| 134 | + void *ras_error_status, |
| 135 | + struct amdgpu_iv_entry *entry) |
| 136 | +{ |
| 137 | + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); |
| 138 | +} |
| 139 | + |
| 140 | +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) |
| 141 | +{ |
| 142 | + int r; |
| 143 | + struct ras_fs_if fs_info = { |
| 144 | + .sysfs_name = "umc_err_count", |
| 145 | + }; |
| 146 | + struct ras_ih_if ih_info = { |
| 147 | + .cb = amdgpu_umc_process_ras_data_cb, |
| 148 | + }; |
| 149 | + |
| 150 | + if (!adev->umc.ras_if) { |
| 151 | + adev->umc.ras_if = |
| 152 | + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
| 153 | + if (!adev->umc.ras_if) |
| 154 | + return -ENOMEM; |
| 155 | + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; |
| 156 | + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
| 157 | + adev->umc.ras_if->sub_block_index = 0; |
| 158 | + } |
| 159 | + ih_info.head = fs_info.head = *adev->umc.ras_if; |
| 160 | + |
| 161 | + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, |
| 162 | + &fs_info, &ih_info); |
| 163 | + if (r) |
| 164 | + goto free; |
| 165 | + |
| 166 | + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { |
| 167 | + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); |
| 168 | + if (r) |
| 169 | + goto late_fini; |
| 170 | + } else { |
| 171 | + r = 0; |
| 172 | + goto free; |
| 173 | + } |
| 174 | + |
| 175 | + /* ras init of specific umc version */ |
| 176 | + if (adev->umc.ras_funcs && |
| 177 | + adev->umc.ras_funcs->err_cnt_init) |
| 178 | + adev->umc.ras_funcs->err_cnt_init(adev); |
| 179 | + |
| 180 | + return 0; |
| 181 | + |
| 182 | +late_fini: |
| 183 | + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); |
| 184 | +free: |
| 185 | + kfree(adev->umc.ras_if); |
| 186 | + adev->umc.ras_if = NULL; |
| 187 | + return r; |
| 188 | +} |
| 189 | + |
| 190 | +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) |
| 191 | +{ |
| 192 | + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && |
| 193 | + adev->umc.ras_if) { |
| 194 | + struct ras_common_if *ras_if = adev->umc.ras_if; |
| 195 | + struct ras_ih_if ih_info = { |
| 196 | + .head = *ras_if, |
| 197 | + .cb = amdgpu_umc_process_ras_data_cb, |
| 198 | + }; |
| 199 | + |
| 200 | + amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
| 201 | + kfree(ras_if); |
| 202 | + } |
| 203 | +} |
| 204 | + |
183 | 205 | int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, |
184 | 206 | struct amdgpu_irq_src *source, |
185 | 207 | struct amdgpu_iv_entry *entry) |
|
0 commit comments