Skip to content

Commit 2d274bf

Browse files
AMD-ShaneXiaoalexdeucher
authored andcommitted
amd/amdkfd: Trigger segfault for early userptr unmmapping
If applications unmap the memory before destroying the userptr, it needs trigger a segfault to notify user space to correct the free sequence in VM debug mode. v2: Send gpu access fault to user space v3: Report gpu address to user space, remove unnecessary params v4: update pr_err into one line, remove userptr log info Signed-off-by: Shane Xiao <shane.xiao@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 8e320f6 commit 2d274bf

3 files changed

Lines changed: 33 additions & 0 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
25592559
if (ret != -EFAULT)
25602560
return ret;
25612561

2562+
/* If applications unmap memory before destroying the userptr
2563+
* from the KFD, trigger a segmentation fault in VM debug mode.
2564+
*/
2565+
if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
2566+
pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
2567+
pid_nr(process_info->pid), mem->va);
2568+
2569+
// Send GPU VM fault to user space
2570+
kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
2571+
mem->va);
2572+
}
2573+
25622574
ret = 0;
25632575
}
25642576

drivers/gpu/drm/amd/amdkfd/kfd_events.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid)
11771177
kfd_unref_process(p);
11781178
}
11791179

1180+
void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
1181+
{
1182+
struct kfd_process_device *pdd;
1183+
struct kfd_hsa_memory_exception_data exception_data;
1184+
int i;
1185+
1186+
memset(&exception_data, 0, sizeof(exception_data));
1187+
exception_data.va = gpu_va;
1188+
exception_data.failure.NotPresent = 1;
1189+
1190+
// Send VM seg fault to all kfd process device
1191+
for (i = 0; i < p->n_pdds; i++) {
1192+
pdd = p->pdds[i];
1193+
exception_data.gpu_id = pdd->user_gpu_id;
1194+
kfd_evict_process_device(pdd);
1195+
kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
1196+
}
1197+
}
1198+
11801199
void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
11811200
struct kfd_vm_fault_info *info,
11821201
struct kfd_hsa_memory_exception_data *data)

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
15071507
int kfd_get_num_events(struct kfd_process *p);
15081508
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
15091509

1510+
void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
1511+
15101512
void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
15111513
struct kfd_vm_fault_info *info,
15121514
struct kfd_hsa_memory_exception_data *data);

0 commit comments

Comments
 (0)