Skip to content

Commit 1157b5d

Browse files
Farah-kassabriogabbay
authored andcommitted
accel/habanalabs: optimize timestamp registration handler
Currently we use dynamic allocation inside the irq handler in order to allocate free node to be used for the free jobs. This operation is expensive, especially when we deal with large burst of events records that get released at the same time. The alternative is to have pre allocated pool of free nodes and just fetch nodes from this pool at irq handling time instead of allocating them. In case the pool becomes full, then the driver will fallback to dynamic allocations. As part of the optimization also update the unregister flow upon re-using a timestamp record, by making the operation much simpler and quicker. We already have the record in the registration flow and now we just seek to re-use with different interrupt. Therefore, no need to look for buffer according to the user handle. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Tomer Tayar <ttayar@habana.ai> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
1 parent 0165994 commit 1157b5d

4 files changed

Lines changed: 179 additions & 67 deletions

File tree

drivers/accel/habanalabs/common/command_submission.c

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3246,60 +3246,29 @@ static int validate_and_get_ts_record(struct device *dev,
32463246
return 0;
32473247
}
32483248

3249-
static int unregister_timestamp_node(struct hl_device *hdev, struct hl_ctx *ctx,
3250-
struct hl_mem_mgr *mmg, u64 ts_handle, u64 ts_offset,
3251-
struct hl_user_interrupt *interrupt)
3249+
static void unregister_timestamp_node(struct hl_device *hdev,
3250+
struct hl_user_pending_interrupt *record, bool need_lock)
32523251
{
3253-
struct hl_user_pending_interrupt *req_event_record, *pend, *temp_pend;
3254-
struct hl_mmap_mem_buf *buff;
3255-
struct hl_ts_buff *ts_buff;
3252+
struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
32563253
bool ts_rec_found = false;
3257-
int rc;
3258-
3259-
buff = hl_mmap_mem_buf_get(mmg, ts_handle);
3260-
if (!buff) {
3261-
dev_err(hdev->dev, "invalid TS buff handle!\n");
3262-
return -EINVAL;
3263-
}
3264-
3265-
ts_buff = buff->private;
3266-
3267-
rc = validate_and_get_ts_record(hdev->dev, ts_buff, ts_offset, &req_event_record);
3268-
if (rc)
3269-
goto put_buf;
32703254

3271-
/*
3272-
* Note: we don't use the ts in_use field here, but we rather scan the list
3273-
* because we cannot rely on the user to keep the order of register/unregister calls
3274-
* and since we might have races here all the time between the irq and register/unregister
3275-
* calls so it safer to lock the list and scan it to find the node.
3276-
* If the node found on the list we mark it as not in use and delete it from the list,
3277-
* if it's not here then the node was handled already in the irq before we get into
3278-
* this ioctl.
3279-
*/
3280-
spin_lock(&interrupt->wait_list_lock);
3255+
if (need_lock)
3256+
spin_lock(&interrupt->wait_list_lock);
32813257

3282-
list_for_each_entry_safe(pend, temp_pend, &interrupt->wait_list_head, wait_list_node) {
3283-
if (pend == req_event_record) {
3284-
pend->ts_reg_info.in_use = false;
3285-
list_del(&pend->wait_list_node);
3286-
ts_rec_found = true;
3287-
break;
3288-
}
3258+
if (record->ts_reg_info.in_use) {
3259+
record->ts_reg_info.in_use = false;
3260+
list_del(&record->wait_list_node);
3261+
ts_rec_found = true;
32893262
}
32903263

3291-
spin_unlock(&interrupt->wait_list_lock);
3264+
if (need_lock)
3265+
spin_unlock(&interrupt->wait_list_lock);
32923266

32933267
/* Put refcounts that were taken when we registered the event */
32943268
if (ts_rec_found) {
3295-
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
3296-
hl_cb_put(pend->ts_reg_info.cq_cb);
3269+
hl_mmap_mem_buf_put(record->ts_reg_info.buf);
3270+
hl_cb_put(record->ts_reg_info.cq_cb);
32973271
}
3298-
3299-
put_buf:
3300-
hl_mmap_mem_buf_put(buff);
3301-
3302-
return rc;
33033272
}
33043273

33053274
static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
@@ -3308,28 +3277,38 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
33083277
{
33093278
struct hl_user_pending_interrupt *req_offset_record;
33103279
struct hl_ts_buff *ts_buff = data->buf->private;
3280+
bool need_lock = false;
33113281
int rc;
33123282

33133283
rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
33143284
&req_offset_record);
33153285
if (rc)
33163286
return rc;
33173287

3318-
/* In case the node already registered, need to unregister first then re-use*/
3288+
/* In case the node already registered, need to unregister first then re-use */
33193289
if (req_offset_record->ts_reg_info.in_use) {
33203290
dev_dbg(data->buf->mmg->dev,
3321-
"Requested ts offset(%llx) is in use, unregister first\n",
3322-
data->ts_offset);
3291+
"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
3292+
req_offset_record,
3293+
req_offset_record->ts_reg_info.interrupt->interrupt_id,
3294+
req_offset_record->ts_reg_info.timestamp_kernel_addr,
3295+
data->interrupt->interrupt_id);
33233296
/*
33243297
* Since interrupt here can be different than the one the node currently registered
3325-
* on, and we don't wan't to lock two lists while we're doing unregister, so
3298+
* on, and we don't want to lock two lists while we're doing unregister, so
33263299
* unlock the new interrupt wait list here and acquire the lock again after you done
33273300
*/
3328-
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
3301+
if (data->interrupt->interrupt_id !=
3302+
req_offset_record->ts_reg_info.interrupt->interrupt_id) {
33293303

3330-
unregister_timestamp_node(hdev, ctx, data->mmg, data->ts_handle,
3331-
data->ts_offset, req_offset_record->ts_reg_info.interrupt);
3332-
spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
3304+
need_lock = true;
3305+
spin_unlock(&data->interrupt->wait_list_lock);
3306+
}
3307+
3308+
unregister_timestamp_node(hdev, req_offset_record, need_lock);
3309+
3310+
if (need_lock)
3311+
spin_lock(&data->interrupt->wait_list_lock);
33333312
}
33343313

33353314
/* Fill up the new registration node info and add it to the list */

drivers/accel/habanalabs/common/device.c

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,7 +2033,9 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
20332033
int hl_device_init(struct hl_device *hdev)
20342034
{
20352035
int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
2036+
struct hl_ts_free_jobs *free_jobs_data;
20362037
bool expose_interfaces_on_err = false;
2038+
void *p;
20372039

20382040
/* Initialize ASIC function pointers and perform early init */
20392041
rc = device_early_init(hdev);
@@ -2050,15 +2052,43 @@ int hl_device_init(struct hl_device *hdev)
20502052
rc = -ENOMEM;
20512053
goto early_fini;
20522054
}
2055+
2056+
/* Timestamp records supported only if CQ supported in device */
2057+
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
2058+
for (i = 0 ; i < user_interrupt_cnt ; i++) {
2059+
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
2060+
sizeof(struct timestamp_reg_free_node));
2061+
if (!p) {
2062+
rc = -ENOMEM;
2063+
goto free_usr_intr_mem;
2064+
}
2065+
free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
2066+
free_jobs_data->free_nodes_pool = p;
2067+
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
2068+
free_jobs_data->next_avail_free_node_idx = 0;
2069+
}
2070+
}
2071+
}
2072+
2073+
free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
2074+
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
2075+
sizeof(struct timestamp_reg_free_node));
2076+
if (!p) {
2077+
rc = -ENOMEM;
2078+
goto free_usr_intr_mem;
20532079
}
20542080

2081+
free_jobs_data->free_nodes_pool = p;
2082+
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
2083+
free_jobs_data->next_avail_free_node_idx = 0;
2084+
20552085
/*
20562086
* Start calling ASIC initialization. First S/W then H/W and finally
20572087
* late init
20582088
*/
20592089
rc = hdev->asic_funcs->sw_init(hdev);
20602090
if (rc)
2061-
goto free_usr_intr_mem;
2091+
goto free_common_usr_intr_mem;
20622092

20632093

20642094
/* initialize completion structure for multi CS wait */
@@ -2297,8 +2327,17 @@ int hl_device_init(struct hl_device *hdev)
22972327
hl_hw_queues_destroy(hdev);
22982328
sw_fini:
22992329
hdev->asic_funcs->sw_fini(hdev);
2330+
free_common_usr_intr_mem:
2331+
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
23002332
free_usr_intr_mem:
2301-
kfree(hdev->user_interrupt);
2333+
if (user_interrupt_cnt) {
2334+
for (i = 0 ; i < user_interrupt_cnt ; i++) {
2335+
if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
2336+
break;
2337+
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
2338+
}
2339+
kfree(hdev->user_interrupt);
2340+
}
23022341
early_fini:
23032342
device_early_fini(hdev);
23042343
out_disabled:
@@ -2323,6 +2362,7 @@ int hl_device_init(struct hl_device *hdev)
23232362
*/
23242363
void hl_device_fini(struct hl_device *hdev)
23252364
{
2365+
u32 user_interrupt_cnt;
23262366
bool device_in_reset;
23272367
ktime_t timeout;
23282368
u64 reset_sec;
@@ -2443,7 +2483,20 @@ void hl_device_fini(struct hl_device *hdev)
24432483
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
24442484
hl_cq_fini(hdev, &hdev->completion_queue[i]);
24452485
kfree(hdev->completion_queue);
2446-
kfree(hdev->user_interrupt);
2486+
2487+
user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
2488+
hdev->asic_prop.user_interrupt_count;
2489+
2490+
if (user_interrupt_cnt) {
2491+
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
2492+
for (i = 0 ; i < user_interrupt_cnt ; i++)
2493+
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
2494+
}
2495+
2496+
kfree(hdev->user_interrupt);
2497+
}
2498+
2499+
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
24472500

24482501
hl_hw_queues_destroy(hdev);
24492502

drivers/accel/habanalabs/common/habanalabs.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ struct hl_fpriv;
106106
/* MMU */
107107
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
108108

109+
#define TIMESTAMP_FREE_NODES_NUM 512
110+
109111
/**
110112
* enum hl_mmu_page_table_location - mmu page table location
111113
* @MMU_DR_PGT: page-table is located on device DRAM.
@@ -1104,9 +1106,26 @@ enum hl_user_interrupt_type {
11041106
HL_USR_INTERRUPT_UNEXPECTED
11051107
};
11061108

1109+
/**
1110+
* struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
1111+
* @free_nodes_pool: pool of nodes to be used for free timestamp jobs
1112+
* @free_nodes_length: number of nodes in free_nodes_pool
1113+
* @next_avail_free_node_idx: index of the next free node in the pool
1114+
*
1115+
* the free nodes pool must be protected by the user interrupt lock
1116+
* to avoid race between different interrupts which are using the same
1117+
* ts buffer with different offsets.
1118+
*/
1119+
struct hl_ts_free_jobs {
1120+
struct timestamp_reg_free_node *free_nodes_pool;
1121+
u32 free_nodes_length;
1122+
u32 next_avail_free_node_idx;
1123+
};
1124+
11071125
/**
11081126
* struct hl_user_interrupt - holds user interrupt information
11091127
* @hdev: pointer to the device structure
1128+
* @ts_free_jobs_data: timestamp free jobs related data
11101129
* @type: user interrupt type
11111130
* @wait_list_head: head to the list of user threads pending on this interrupt
11121131
* @wait_list_lock: protects wait_list_head
@@ -1115,6 +1134,7 @@ enum hl_user_interrupt_type {
11151134
*/
11161135
struct hl_user_interrupt {
11171136
struct hl_device *hdev;
1137+
struct hl_ts_free_jobs ts_free_jobs_data;
11181138
enum hl_user_interrupt_type type;
11191139
struct list_head wait_list_head;
11201140
spinlock_t wait_list_lock;
@@ -1127,11 +1147,15 @@ struct hl_user_interrupt {
11271147
* @free_objects_node: node in the list free_obj_jobs
11281148
* @cq_cb: pointer to cq command buffer to be freed
11291149
* @buf: pointer to timestamp buffer to be freed
1150+
* @in_use: indicates whether the node still in use in workqueue thread.
1151+
* @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
11301152
*/
11311153
struct timestamp_reg_free_node {
11321154
struct list_head free_objects_node;
11331155
struct hl_cb *cq_cb;
11341156
struct hl_mmap_mem_buf *buf;
1157+
atomic_t in_use;
1158+
u8 dynamic_alloc;
11351159
};
11361160

11371161
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@@ -1140,11 +1164,14 @@ struct timestamp_reg_free_node {
11401164
* @free_obj: workqueue object to free timestamp registration node objects
11411165
* @hdev: pointer to the device structure
11421166
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
1167+
* @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
1168+
* interrupt handler.
11431169
*/
11441170
struct timestamp_reg_work_obj {
11451171
struct work_struct free_obj;
11461172
struct hl_device *hdev;
11471173
struct list_head *free_obj_head;
1174+
struct list_head *dynamic_alloc_free_obj_head;
11481175
};
11491176

11501177
/* struct timestamp_reg_info - holds the timestamp registration related data.

0 commit comments

Comments
 (0)