Skip to content

Commit 5350355

Browse files
yifancomalexdeucher
authored andcommitted
amd/amdkfd: correct mem limit calculation for small APUs
Current mem limit check leaks some GTT memory (reserved_for_pt reserved_for_ras + adev->vram_pin_size) for small APUs. Since carveout VRAM is tunable on APUs, there are three case regarding the carveout VRAM size relative to GTT: 1. 0 < carveout < gtt apu_prefer_gtt = true, is_app_apu = false 2. carveout > gtt / 2 apu_prefer_gtt = false, is_app_apu = false 3. 0 = carveout apu_prefer_gtt = true, is_app_apu = true It doesn't make sense to check below limitation in case 1 (default case, small carveout) because the values in the below expression are mixed with carveout and gtt. adev->kfd.vram_used[xcp_id] + vram_needed > vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size) gtt: kfd.vram_used, vram_needed, vram_size carveout: reserved_for_pt, reserved_for_ras, adev->vram_pin_size In case 1, vram allocation will go to gtt domain, skip vram check since ttm_mem_limit check already cover this allocation. Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit fa7c99f)
1 parent ce42a3b commit 5350355

1 file changed

Lines changed: 32 additions & 12 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
213213
spin_lock(&kfd_mem_limit.mem_limit_lock);
214214

215215
if (kfd_mem_limit.system_mem_used + system_mem_needed >
216-
kfd_mem_limit.max_system_mem_limit)
216+
kfd_mem_limit.max_system_mem_limit) {
217217
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
218+
if (!no_system_mem_limit) {
219+
ret = -ENOMEM;
220+
goto release;
221+
}
222+
}
218223

219-
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
220-
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
221-
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
222-
kfd_mem_limit.max_ttm_mem_limit) ||
223-
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
224-
vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
224+
if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
225+
kfd_mem_limit.max_ttm_mem_limit) {
225226
ret = -ENOMEM;
226227
goto release;
227228
}
228229

230+
/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
231+
* carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
232+
* VRAM check since ttm_mem_limit check already cover this allocation
233+
*/
234+
235+
if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
236+
uint64_t vram_available =
237+
vram_size - reserved_for_pt - reserved_for_ras -
238+
atomic64_read(&adev->vram_pin_size);
239+
if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
240+
ret = -ENOMEM;
241+
goto release;
242+
}
243+
}
244+
229245
/* Update memory accounting by decreasing available system
230246
* memory, TTM memory and GPU memory as computed above
231247
*/
@@ -1626,11 +1642,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
16261642
uint64_t vram_available, system_mem_available, ttm_mem_available;
16271643

16281644
spin_lock(&kfd_mem_limit.mem_limit_lock);
1629-
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1630-
- adev->kfd.vram_used_aligned[xcp_id]
1631-
- atomic64_read(&adev->vram_pin_size)
1632-
- reserved_for_pt
1633-
- reserved_for_ras;
1645+
if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
1646+
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1647+
- adev->kfd.vram_used_aligned[xcp_id];
1648+
else
1649+
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1650+
- adev->kfd.vram_used_aligned[xcp_id]
1651+
- atomic64_read(&adev->vram_pin_size)
1652+
- reserved_for_pt
1653+
- reserved_for_ras;
16341654

16351655
if (adev->apu_prefer_gtt) {
16361656
system_mem_available = no_system_mem_limit ?

0 commit comments

Comments
 (0)