Skip to content

Commit ba24b5e

Browse files
Farah-kassabriogabbay
authored andcommitted
accel/habanalabs: split user interrupts pending list
Currently driver maintain one list for both pending user interrupts which seeks to wait till CQ reaches it's target value and also the ones that seeks to get timestamp records when the CQ reaches it's target value. This causes delay in handling the waiters which gets higher priority than the timestamp records. In order to solve this, let's split the list into two, one for each case and each one is protected by it's own spinlock. Waiters will be handled within the interrupt context first, then the timestamp records will be set. Freeing the timestamp related memory will be handled in a workqueue. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Tomer Tayar <ttayar@habana.ai> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
1 parent 1157b5d commit ba24b5e

4 files changed

Lines changed: 209 additions & 147 deletions

File tree

drivers/accel/habanalabs/common/command_submission.c

Lines changed: 138 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,19 +1098,22 @@ static void
10981098
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
10991099
{
11001100
struct hl_user_pending_interrupt *pend, *temp;
1101+
unsigned long flags;
11011102

1102-
spin_lock(&interrupt->wait_list_lock);
1103-
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
1104-
if (pend->ts_reg_info.buf) {
1105-
list_del(&pend->wait_list_node);
1106-
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1107-
hl_cb_put(pend->ts_reg_info.cq_cb);
1108-
} else {
1109-
pend->fence.error = -EIO;
1110-
complete_all(&pend->fence.completion);
1111-
}
1103+
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
1104+
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
1105+
pend->fence.error = -EIO;
1106+
complete_all(&pend->fence.completion);
11121107
}
1113-
spin_unlock(&interrupt->wait_list_lock);
1108+
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
1109+
1110+
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
1111+
list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
1112+
list_del(&pend->list_node);
1113+
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1114+
hl_cb_put(pend->ts_reg_info.cq_cb);
1115+
}
1116+
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
11141117
}
11151118

11161119
void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -3251,18 +3254,19 @@ static void unregister_timestamp_node(struct hl_device *hdev,
32513254
{
32523255
struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
32533256
bool ts_rec_found = false;
3257+
unsigned long flags;
32543258

32553259
if (need_lock)
3256-
spin_lock(&interrupt->wait_list_lock);
3260+
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
32573261

32583262
if (record->ts_reg_info.in_use) {
32593263
record->ts_reg_info.in_use = false;
3260-
list_del(&record->wait_list_node);
3264+
list_del(&record->list_node);
32613265
ts_rec_found = true;
32623266
}
32633267

32643268
if (need_lock)
3265-
spin_unlock(&interrupt->wait_list_lock);
3269+
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
32663270

32673271
/* Put refcounts that were taken when we registered the event */
32683272
if (ts_rec_found) {
@@ -3272,7 +3276,7 @@ static void unregister_timestamp_node(struct hl_device *hdev,
32723276
}
32733277

32743278
static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
3275-
struct wait_interrupt_data *data,
3279+
struct wait_interrupt_data *data, unsigned long *flags,
32763280
struct hl_user_pending_interrupt **pend)
32773281
{
32783282
struct hl_user_pending_interrupt *req_offset_record;
@@ -3302,13 +3306,13 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
33023306
req_offset_record->ts_reg_info.interrupt->interrupt_id) {
33033307

33043308
need_lock = true;
3305-
spin_unlock(&data->interrupt->wait_list_lock);
3309+
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
33063310
}
33073311

33083312
unregister_timestamp_node(hdev, req_offset_record, need_lock);
33093313

33103314
if (need_lock)
3311-
spin_lock(&data->interrupt->wait_list_lock);
3315+
spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
33123316
}
33133317

33143318
/* Fill up the new registration node info and add it to the list */
@@ -3325,18 +3329,14 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
33253329
return rc;
33263330
}
33273331

3328-
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3332+
static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
33293333
struct wait_interrupt_data *data,
3330-
bool register_ts_record,
33313334
u32 *status, u64 *timestamp)
33323335
{
33333336
struct hl_user_pending_interrupt *pend;
3334-
unsigned long timeout;
3335-
long completion_rc;
3337+
unsigned long flags;
33363338
int rc = 0;
33373339

3338-
timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3339-
33403340
hl_ctx_get(ctx);
33413341

33423342
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
@@ -3352,61 +3352,109 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
33523352
goto put_cq_cb;
33533353
}
33543354

3355-
if (register_ts_record) {
3356-
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
3357-
data->interrupt->interrupt_id, data->ts_handle,
3358-
data->ts_offset, data->cq_offset);
3355+
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
3356+
data->interrupt->interrupt_id, data->ts_handle,
3357+
data->ts_offset, data->cq_offset);
33593358

3360-
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3361-
if (!data->buf) {
3362-
rc = -EINVAL;
3363-
goto put_cq_cb;
3364-
}
3359+
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3360+
if (!data->buf) {
3361+
rc = -EINVAL;
3362+
goto put_cq_cb;
3363+
}
33653364

3366-
spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
3365+
spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
33673366

3368-
/* get ts buffer record */
3369-
rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &pend);
3370-
if (rc) {
3371-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3372-
goto put_ts_buff;
3373-
}
3374-
} else {
3375-
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3376-
if (!pend) {
3377-
rc = -ENOMEM;
3378-
goto put_cq_cb;
3379-
}
3380-
hl_fence_init(&pend->fence, ULONG_MAX);
3381-
pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3382-
pend->cq_target_value = data->target_value;
3383-
spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
3367+
/* get ts buffer record */
3368+
rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
3369+
if (rc) {
3370+
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3371+
goto put_ts_buff;
33843372
}
33853373

33863374
/* We check for completion value as interrupt could have been received
3387-
* before we add the wait/timestamp node to the wait list.
3375+
* before we add the timestamp node to the ts list.
33883376
*/
33893377
if (*pend->cq_kernel_addr >= data->target_value) {
3390-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3378+
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
33913379

3392-
if (register_ts_record) {
3393-
dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
3394-
pend, data->ts_offset, data->interrupt->interrupt_id);
3395-
pend->ts_reg_info.in_use = false;
3396-
}
3380+
dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
3381+
pend, data->ts_offset, data->interrupt->interrupt_id);
33973382

3383+
pend->ts_reg_info.in_use = 0;
33983384
*status = HL_WAIT_CS_STATUS_COMPLETED;
3385+
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3386+
3387+
goto put_ts_buff;
3388+
}
3389+
3390+
list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
3391+
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3392+
3393+
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3394+
3395+
hl_ctx_put(ctx);
3396+
3397+
return rc;
3398+
3399+
put_ts_buff:
3400+
hl_mmap_mem_buf_put(data->buf);
3401+
put_cq_cb:
3402+
hl_cb_put(data->cq_cb);
3403+
put_ctx:
3404+
hl_ctx_put(ctx);
3405+
3406+
return rc;
3407+
}
3408+
3409+
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3410+
struct wait_interrupt_data *data,
3411+
u32 *status, u64 *timestamp)
3412+
{
3413+
struct hl_user_pending_interrupt *pend;
3414+
unsigned long timeout, flags;
3415+
long completion_rc;
3416+
int rc = 0;
3417+
3418+
timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3419+
3420+
hl_ctx_get(ctx);
3421+
3422+
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3423+
if (!data->cq_cb) {
3424+
rc = -EINVAL;
3425+
goto put_ctx;
3426+
}
3427+
3428+
/* Validate the cq offset */
3429+
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3430+
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3431+
rc = -EINVAL;
3432+
goto put_cq_cb;
3433+
}
3434+
3435+
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3436+
if (!pend) {
3437+
rc = -ENOMEM;
3438+
goto put_cq_cb;
3439+
}
3440+
3441+
hl_fence_init(&pend->fence, ULONG_MAX);
3442+
pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3443+
pend->cq_target_value = data->target_value;
3444+
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3445+
3446+
3447+
/* We check for completion value as interrupt could have been received
3448+
* before we add the wait node to the wait list.
3449+
*/
3450+
if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
3451+
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3452+
3453+
if (*pend->cq_kernel_addr >= data->target_value)
3454+
*status = HL_WAIT_CS_STATUS_COMPLETED;
3455+
else
3456+
*status = HL_WAIT_CS_STATUS_BUSY;
33993457

3400-
if (register_ts_record) {
3401-
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3402-
goto put_ts_buff;
3403-
} else {
3404-
pend->fence.timestamp = ktime_get();
3405-
goto set_timestamp;
3406-
}
3407-
} else if (!data->intr_timeout_us) {
3408-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3409-
*status = HL_WAIT_CS_STATUS_BUSY;
34103458
pend->fence.timestamp = ktime_get();
34113459
goto set_timestamp;
34123460
}
@@ -3417,13 +3465,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
34173465
* in order to shorten the list pass loop, since
34183466
* same list could have nodes for different cq counter handle.
34193467
*/
3420-
list_add_tail(&pend->wait_list_node, &data->interrupt->wait_list_head);
3421-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3422-
3423-
if (register_ts_record) {
3424-
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3425-
goto ts_registration_exit;
3426-
}
3468+
list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
3469+
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
34273470

34283471
/* Wait for interrupt handler to signal completion */
34293472
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
@@ -3462,21 +3505,18 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
34623505
* for ts record, the node will be deleted in the irq handler after
34633506
* we reach the target value.
34643507
*/
3465-
spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
3466-
list_del(&pend->wait_list_node);
3467-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3508+
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3509+
list_del(&pend->list_node);
3510+
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
34683511

34693512
set_timestamp:
34703513
*timestamp = ktime_to_ns(pend->fence.timestamp);
34713514
kfree(pend);
34723515
hl_cb_put(data->cq_cb);
3473-
ts_registration_exit:
34743516
hl_ctx_put(ctx);
34753517

34763518
return rc;
34773519

3478-
put_ts_buff:
3479-
hl_mmap_mem_buf_put(data->buf);
34803520
put_cq_cb:
34813521
hl_cb_put(data->cq_cb);
34823522
put_ctx:
@@ -3513,7 +3553,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
35133553
* handler to monitor
35143554
*/
35153555
spin_lock(&interrupt->wait_list_lock);
3516-
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
3556+
list_add_tail(&pend->list_node, &interrupt->wait_list_head);
35173557
spin_unlock(&interrupt->wait_list_lock);
35183558

35193559
/* We check for completion value as interrupt could have been received
@@ -3590,7 +3630,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
35903630

35913631
remove_pending_user_interrupt:
35923632
spin_lock(&interrupt->wait_list_lock);
3593-
list_del(&pend->wait_list_node);
3633+
list_del(&pend->list_node);
35943634
spin_unlock(&interrupt->wait_list_lock);
35953635

35963636
*timestamp = ktime_to_ns(pend->fence.timestamp);
@@ -3649,16 +3689,6 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
36493689
return -EINVAL;
36503690
}
36513691

3652-
/*
3653-
* Allow only one registration at a time. this is needed in order to prevent issues
3654-
* while handling the flow of re-use of the same offset.
3655-
* Since the registration flow is protected only by the interrupt lock, re-use flow
3656-
* might request to move ts node to another interrupt list, and in such case we're
3657-
* not protected.
3658-
*/
3659-
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
3660-
mutex_lock(&hpriv->ctx->ts_reg_lock);
3661-
36623692
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
36633693
struct wait_interrupt_data wait_intr_data = {0};
36643694

@@ -3671,19 +3701,30 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
36713701
wait_intr_data.target_value = args->in.target;
36723702
wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
36733703

3674-
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3675-
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
3676-
&status, &timestamp);
3704+
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
3705+
/*
3706+
* Allow only one registration at a time. this is needed in order to prevent
3707+
* issues while handling the flow of re-use of the same offset.
3708+
* Since the registration flow is protected only by the interrupt lock,
3709+
* re-use flow might request to move ts node to another interrupt list,
3710+
* and in such case we're not protected.
3711+
*/
3712+
mutex_lock(&hpriv->ctx->ts_reg_lock);
3713+
3714+
rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3715+
&status, &timestamp);
3716+
3717+
mutex_unlock(&hpriv->ctx->ts_reg_lock);
3718+
} else
3719+
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3720+
&status, &timestamp);
36773721
} else {
36783722
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
36793723
args->in.interrupt_timeout_us, args->in.addr,
36803724
args->in.target, interrupt, &status,
36813725
&timestamp);
36823726
}
36833727

3684-
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
3685-
mutex_unlock(&hpriv->ctx->ts_reg_lock);
3686-
36873728
if (rc)
36883729
return rc;
36893730

0 commit comments

Comments
 (0)