Skip to content

Commit adda4e8

Browse files
committed
drm/xe: Enforce correct user fence signaling order using
Prevent application hangs caused by out-of-order fence signaling when user fences are attached. Use drm_syncobj (via dma-fence-chain) to guarantee that each user fence signals in order, regardless of the signaling order of the attached fences. Ensure user fence writebacks to user space occur in the correct sequence. v7: - Skip drm_syncbj create of error (CI) Fixes: dd08ebf ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patch.msgid.link/20251031234050.3043507-2-matthew.brost@intel.com
1 parent a4ff26b commit adda4e8

9 files changed

Lines changed: 86 additions & 18 deletions

File tree

drivers/gpu/drm/xe/xe_exec.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
173173

174174
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
175175
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
176-
&syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC |
176+
&syncs_user[num_syncs], NULL, 0,
177+
SYNC_PARSE_FLAG_EXEC |
177178
(xe_vm_in_lr_mode(vm) ?
178179
SYNC_PARSE_FLAG_LR_MODE : 0));
179180
if (err)

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <drm/drm_device.h>
1111
#include <drm/drm_drv.h>
1212
#include <drm/drm_file.h>
13+
#include <drm/drm_syncobj.h>
1314
#include <uapi/drm/xe_drm.h>
1415

1516
#include "xe_dep_scheduler.h"
@@ -368,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
368369
}
369370
xe_vm_put(migrate_vm);
370371

372+
if (!IS_ERR(q)) {
373+
int err = drm_syncobj_create(&q->ufence_syncobj,
374+
DRM_SYNCOBJ_CREATE_SIGNALED,
375+
NULL);
376+
if (err) {
377+
xe_exec_queue_put(q);
378+
return ERR_PTR(err);
379+
}
380+
}
381+
371382
return q;
372383
}
373384
ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
@@ -379,6 +390,9 @@ void xe_exec_queue_destroy(struct kref *ref)
379390

380391
xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
381392

393+
if (q->ufence_syncobj)
394+
drm_syncobj_put(q->ufence_syncobj);
395+
382396
if (xe_exec_queue_uses_pxp(q))
383397
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
384398

drivers/gpu/drm/xe/xe_exec_queue_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "xe_hw_fence_types.h"
1616
#include "xe_lrc_types.h"
1717

18+
struct drm_syncobj;
1819
struct xe_execlist_exec_queue;
1920
struct xe_gt;
2021
struct xe_guc_exec_queue;
@@ -155,6 +156,12 @@ struct xe_exec_queue {
155156
struct list_head link;
156157
} pxp;
157158

159+
/** @ufence_syncobj: User fence syncobj */
160+
struct drm_syncobj *ufence_syncobj;
161+
162+
/** @ufence_timeline_value: User fence timeline value */
163+
u64 ufence_timeline_value;
164+
158165
/** @ops: submission backend exec queue operations */
159166
const struct xe_exec_queue_ops *ops;
160167

drivers/gpu/drm/xe/xe_oa.c

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <drm/drm_drv.h>
1212
#include <drm/drm_managed.h>
13+
#include <drm/drm_syncobj.h>
1314
#include <uapi/drm/xe_drm.h>
1415

1516
#include <generated/xe_wa_oob.h>
@@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro
13901391
return 0;
13911392
}
13921393

1393-
static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
1394+
static int xe_oa_parse_syncs(struct xe_oa *oa,
1395+
struct xe_oa_stream *stream,
1396+
struct xe_oa_open_param *param)
13941397
{
13951398
int ret, num_syncs, num_ufence = 0;
13961399

@@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
14101413

14111414
for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
14121415
ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
1413-
&param->syncs_user[num_syncs], 0);
1416+
&param->syncs_user[num_syncs],
1417+
stream->ufence_syncobj,
1418+
++stream->ufence_timeline_value, 0);
14141419
if (ret)
14151420
goto err_syncs;
14161421

@@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
15401545
return -ENODEV;
15411546

15421547
param.xef = stream->xef;
1543-
err = xe_oa_parse_syncs(stream->oa, &param);
1548+
err = xe_oa_parse_syncs(stream->oa, stream, &param);
15441549
if (err)
15451550
goto err_config_put;
15461551

@@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream)
16361641
if (stream->exec_q)
16371642
xe_exec_queue_put(stream->exec_q);
16381643

1644+
drm_syncobj_put(stream->ufence_syncobj);
16391645
kfree(stream);
16401646
}
16411647

@@ -1827,6 +1833,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
18271833
struct xe_oa_open_param *param)
18281834
{
18291835
struct xe_oa_stream *stream;
1836+
struct drm_syncobj *ufence_syncobj;
18301837
int stream_fd;
18311838
int ret;
18321839

@@ -1837,17 +1844,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
18371844
goto exit;
18381845
}
18391846

1847+
ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED,
1848+
NULL);
1849+
if (ret)
1850+
goto exit;
1851+
18401852
stream = kzalloc(sizeof(*stream), GFP_KERNEL);
18411853
if (!stream) {
18421854
ret = -ENOMEM;
1843-
goto exit;
1855+
goto err_syncobj;
18441856
}
1845-
1857+
stream->ufence_syncobj = ufence_syncobj;
18461858
stream->oa = oa;
1847-
ret = xe_oa_stream_init(stream, param);
1859+
1860+
ret = xe_oa_parse_syncs(oa, stream, param);
18481861
if (ret)
18491862
goto err_free;
18501863

1864+
ret = xe_oa_stream_init(stream, param);
1865+
if (ret) {
1866+
while (param->num_syncs--)
1867+
xe_sync_entry_cleanup(&param->syncs[param->num_syncs]);
1868+
kfree(param->syncs);
1869+
goto err_free;
1870+
}
1871+
18511872
if (!param->disabled) {
18521873
ret = xe_oa_enable_locked(stream);
18531874
if (ret)
@@ -1871,6 +1892,8 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
18711892
xe_oa_stream_destroy(stream);
18721893
err_free:
18731894
kfree(stream);
1895+
err_syncobj:
1896+
drm_syncobj_put(ufence_syncobj);
18741897
exit:
18751898
return ret;
18761899
}
@@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
20842107
goto err_exec_q;
20852108
}
20862109

2087-
ret = xe_oa_parse_syncs(oa, &param);
2088-
if (ret)
2089-
goto err_exec_q;
2090-
20912110
mutex_lock(&param.hwe->gt->oa.gt_lock);
20922111
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
20932112
mutex_unlock(&param.hwe->gt->oa.gt_lock);
20942113
if (ret < 0)
2095-
goto err_sync_cleanup;
2114+
goto err_exec_q;
20962115

20972116
return ret;
20982117

2099-
err_sync_cleanup:
2100-
while (param.num_syncs--)
2101-
xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
2102-
kfree(param.syncs);
21032118
err_exec_q:
21042119
if (param.exec_q)
21052120
xe_exec_queue_put(param.exec_q);

drivers/gpu/drm/xe/xe_oa_types.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "regs/xe_reg_defs.h"
1616
#include "xe_hw_engine_types.h"
1717

18+
struct drm_syncobj;
19+
1820
#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
1921

2022
enum xe_oa_report_header {
@@ -248,6 +250,12 @@ struct xe_oa_stream {
248250
/** @xef: xe_file with which the stream was opened */
249251
struct xe_file *xef;
250252

253+
/** @ufence_syncobj: User fence syncobj */
254+
struct drm_syncobj *ufence_syncobj;
255+
256+
/** @ufence_timeline_value: User fence timeline value */
257+
u64 ufence_timeline_value;
258+
251259
/** @last_fence: fence to use in stream destroy when needed */
252260
struct dma_fence *last_fence;
253261

drivers/gpu/drm/xe/xe_sync.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
113113
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
114114
struct xe_sync_entry *sync,
115115
struct drm_xe_sync __user *sync_user,
116+
struct drm_syncobj *ufence_syncobj,
117+
u64 ufence_timeline_value,
116118
unsigned int flags)
117119
{
118120
struct drm_xe_sync sync_in;
@@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
192194
if (exec) {
193195
sync->addr = sync_in.addr;
194196
} else {
197+
sync->ufence_timeline_value = ufence_timeline_value;
195198
sync->ufence = user_fence_create(xe, sync_in.addr,
196199
sync_in.timeline_value);
197200
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
198201
return PTR_ERR(sync->ufence);
202+
sync->ufence_chain_fence = dma_fence_chain_alloc();
203+
if (!sync->ufence_chain_fence)
204+
return -ENOMEM;
205+
sync->ufence_syncobj = ufence_syncobj;
199206
}
200207

201208
break;
@@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
239246
} else if (sync->ufence) {
240247
int err;
241248

242-
dma_fence_get(fence);
249+
drm_syncobj_add_point(sync->ufence_syncobj,
250+
sync->ufence_chain_fence,
251+
fence, sync->ufence_timeline_value);
252+
sync->ufence_chain_fence = NULL;
253+
254+
fence = drm_syncobj_fence_get(sync->ufence_syncobj);
243255
user_fence_get(sync->ufence);
244256
err = dma_fence_add_callback(fence, &sync->ufence->cb,
245257
user_fence_cb);
@@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
259271
drm_syncobj_put(sync->syncobj);
260272
dma_fence_put(sync->fence);
261273
dma_fence_chain_free(sync->chain_fence);
262-
if (sync->ufence)
274+
dma_fence_chain_free(sync->ufence_chain_fence);
275+
if (!IS_ERR_OR_NULL(sync->ufence))
263276
user_fence_put(sync->ufence);
264277
}
265278

drivers/gpu/drm/xe/xe_sync.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "xe_sync_types.h"
1010

11+
struct drm_syncobj;
1112
struct xe_device;
1213
struct xe_exec_queue;
1314
struct xe_file;
@@ -21,6 +22,8 @@ struct xe_vm;
2122
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
2223
struct xe_sync_entry *sync,
2324
struct drm_xe_sync __user *sync_user,
25+
struct drm_syncobj *ufence_syncobj,
26+
u64 ufence_timeline_value,
2427
unsigned int flags);
2528
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
2629
struct xe_sched_job *job);

drivers/gpu/drm/xe/xe_sync_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@ struct xe_sync_entry {
1818
struct drm_syncobj *syncobj;
1919
struct dma_fence *fence;
2020
struct dma_fence_chain *chain_fence;
21+
struct dma_fence_chain *ufence_chain_fence;
22+
struct drm_syncobj *ufence_syncobj;
2123
struct xe_user_fence *ufence;
2224
u64 addr;
2325
u64 timeline_value;
26+
u64 ufence_timeline_value;
2427
u32 type;
2528
u32 flags;
2629
};

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3633,8 +3633,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
36333633

36343634
syncs_user = u64_to_user_ptr(args->syncs);
36353635
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3636+
struct xe_exec_queue *__q = q ?: vm->q[0];
3637+
36363638
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
36373639
&syncs_user[num_syncs],
3640+
__q->ufence_syncobj,
3641+
++__q->ufence_timeline_value,
36383642
(xe_vm_in_lr_mode(vm) ?
36393643
SYNC_PARSE_FLAG_LR_MODE : 0) |
36403644
(!args->num_binds ?

0 commit comments

Comments
 (0)