@@ -1098,19 +1098,22 @@ static void
10981098wake_pending_user_interrupt_threads (struct hl_user_interrupt * interrupt )
10991099{
11001100 struct hl_user_pending_interrupt * pend , * temp ;
1101+ unsigned long flags ;
11011102
1102- spin_lock (& interrupt -> wait_list_lock );
1103- list_for_each_entry_safe (pend , temp , & interrupt -> wait_list_head , wait_list_node ) {
1104- if (pend -> ts_reg_info .buf ) {
1105- list_del (& pend -> wait_list_node );
1106- hl_mmap_mem_buf_put (pend -> ts_reg_info .buf );
1107- hl_cb_put (pend -> ts_reg_info .cq_cb );
1108- } else {
1109- pend -> fence .error = - EIO ;
1110- complete_all (& pend -> fence .completion );
1111- }
1103+ spin_lock_irqsave (& interrupt -> wait_list_lock , flags );
1104+ list_for_each_entry_safe (pend , temp , & interrupt -> wait_list_head , list_node ) {
1105+ pend -> fence .error = - EIO ;
1106+ complete_all (& pend -> fence .completion );
11121107 }
1113- spin_unlock (& interrupt -> wait_list_lock );
1108+ spin_unlock_irqrestore (& interrupt -> wait_list_lock , flags );
1109+
1110+ spin_lock_irqsave (& interrupt -> ts_list_lock , flags );
1111+ list_for_each_entry_safe (pend , temp , & interrupt -> ts_list_head , list_node ) {
1112+ list_del (& pend -> list_node );
1113+ hl_mmap_mem_buf_put (pend -> ts_reg_info .buf );
1114+ hl_cb_put (pend -> ts_reg_info .cq_cb );
1115+ }
1116+ spin_unlock_irqrestore (& interrupt -> ts_list_lock , flags );
11141117}
11151118
11161119void hl_release_pending_user_interrupts (struct hl_device * hdev )
@@ -3251,18 +3254,19 @@ static void unregister_timestamp_node(struct hl_device *hdev,
32513254{
32523255 struct hl_user_interrupt * interrupt = record -> ts_reg_info .interrupt ;
32533256 bool ts_rec_found = false;
3257+ unsigned long flags ;
32543258
32553259 if (need_lock )
3256- spin_lock (& interrupt -> wait_list_lock );
3260+ spin_lock_irqsave (& interrupt -> ts_list_lock , flags );
32573261
32583262 if (record -> ts_reg_info .in_use ) {
32593263 record -> ts_reg_info .in_use = false;
3260- list_del (& record -> wait_list_node );
3264+ list_del (& record -> list_node );
32613265 ts_rec_found = true;
32623266 }
32633267
32643268 if (need_lock )
3265- spin_unlock (& interrupt -> wait_list_lock );
3269+ spin_unlock_irqrestore (& interrupt -> ts_list_lock , flags );
32663270
32673271 /* Put refcounts that were taken when we registered the event */
32683272 if (ts_rec_found ) {
@@ -3272,7 +3276,7 @@ static void unregister_timestamp_node(struct hl_device *hdev,
32723276}
32733277
32743278static int ts_get_and_handle_kernel_record (struct hl_device * hdev , struct hl_ctx * ctx ,
3275- struct wait_interrupt_data * data ,
3279+ struct wait_interrupt_data * data , unsigned long * flags ,
32763280 struct hl_user_pending_interrupt * * pend )
32773281{
32783282 struct hl_user_pending_interrupt * req_offset_record ;
@@ -3302,13 +3306,13 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
33023306 req_offset_record -> ts_reg_info .interrupt -> interrupt_id ) {
33033307
33043308 need_lock = true;
3305- spin_unlock (& data -> interrupt -> wait_list_lock );
3309+ spin_unlock_irqrestore (& data -> interrupt -> ts_list_lock , * flags );
33063310 }
33073311
33083312 unregister_timestamp_node (hdev , req_offset_record , need_lock );
33093313
33103314 if (need_lock )
3311- spin_lock (& data -> interrupt -> wait_list_lock );
3315+ spin_lock_irqsave (& data -> interrupt -> ts_list_lock , * flags );
33123316 }
33133317
33143318 /* Fill up the new registration node info and add it to the list */
@@ -3325,18 +3329,14 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
33253329 return rc ;
33263330}
33273331
3328- static int _hl_interrupt_wait_ioctl (struct hl_device * hdev , struct hl_ctx * ctx ,
3332+ static int _hl_interrupt_ts_reg_ioctl (struct hl_device * hdev , struct hl_ctx * ctx ,
33293333 struct wait_interrupt_data * data ,
3330- bool register_ts_record ,
33313334 u32 * status , u64 * timestamp )
33323335{
33333336 struct hl_user_pending_interrupt * pend ;
3334- unsigned long timeout ;
3335- long completion_rc ;
3337+ unsigned long flags ;
33363338 int rc = 0 ;
33373339
3338- timeout = hl_usecs64_to_jiffies (data -> intr_timeout_us );
3339-
33403340 hl_ctx_get (ctx );
33413341
33423342 data -> cq_cb = hl_cb_get (data -> mmg , data -> cq_handle );
@@ -3352,61 +3352,109 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
33523352 goto put_cq_cb ;
33533353 }
33543354
3355- if (register_ts_record ) {
3356- dev_dbg (hdev -> dev , "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n" ,
3357- data -> interrupt -> interrupt_id , data -> ts_handle ,
3358- data -> ts_offset , data -> cq_offset );
3355+ dev_dbg (hdev -> dev , "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n" ,
3356+ data -> interrupt -> interrupt_id , data -> ts_handle ,
3357+ data -> ts_offset , data -> cq_offset );
33593358
3360- data -> buf = hl_mmap_mem_buf_get (data -> mmg , data -> ts_handle );
3361- if (!data -> buf ) {
3362- rc = - EINVAL ;
3363- goto put_cq_cb ;
3364- }
3359+ data -> buf = hl_mmap_mem_buf_get (data -> mmg , data -> ts_handle );
3360+ if (!data -> buf ) {
3361+ rc = - EINVAL ;
3362+ goto put_cq_cb ;
3363+ }
33653364
3366- spin_lock_irqsave (& data -> interrupt -> wait_list_lock , data -> flags );
3365+ spin_lock_irqsave (& data -> interrupt -> ts_list_lock , flags );
33673366
3368- /* get ts buffer record */
3369- rc = ts_get_and_handle_kernel_record (hdev , ctx , data , & pend );
3370- if (rc ) {
3371- spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , data -> flags );
3372- goto put_ts_buff ;
3373- }
3374- } else {
3375- pend = kzalloc (sizeof (* pend ), GFP_KERNEL );
3376- if (!pend ) {
3377- rc = - ENOMEM ;
3378- goto put_cq_cb ;
3379- }
3380- hl_fence_init (& pend -> fence , ULONG_MAX );
3381- pend -> cq_kernel_addr = (u64 * ) data -> cq_cb -> kernel_address + data -> cq_offset ;
3382- pend -> cq_target_value = data -> target_value ;
3383- spin_lock_irqsave (& data -> interrupt -> wait_list_lock , data -> flags );
3367+ /* get ts buffer record */
3368+ rc = ts_get_and_handle_kernel_record (hdev , ctx , data , & flags , & pend );
3369+ if (rc ) {
3370+ spin_unlock_irqrestore (& data -> interrupt -> ts_list_lock , flags );
3371+ goto put_ts_buff ;
33843372 }
33853373
33863374 /* We check for completion value as interrupt could have been received
3387- * before we add the wait/ timestamp node to the wait list.
3375+ * before we add the timestamp node to the ts list.
33883376 */
33893377 if (* pend -> cq_kernel_addr >= data -> target_value ) {
3390- spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , data -> flags );
3378+ spin_unlock_irqrestore (& data -> interrupt -> ts_list_lock , flags );
33913379
3392- if (register_ts_record ) {
3393- dev_dbg (hdev -> dev , "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n" ,
3394- pend , data -> ts_offset , data -> interrupt -> interrupt_id );
3395- pend -> ts_reg_info .in_use = false;
3396- }
3380+ dev_dbg (hdev -> dev , "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n" ,
3381+ pend , data -> ts_offset , data -> interrupt -> interrupt_id );
33973382
3383+ pend -> ts_reg_info .in_use = 0 ;
33983384 * status = HL_WAIT_CS_STATUS_COMPLETED ;
3385+ * pend -> ts_reg_info .timestamp_kernel_addr = ktime_get_ns ();
3386+
3387+ goto put_ts_buff ;
3388+ }
3389+
3390+ list_add_tail (& pend -> list_node , & data -> interrupt -> ts_list_head );
3391+ spin_unlock_irqrestore (& data -> interrupt -> ts_list_lock , flags );
3392+
3393+ rc = * status = HL_WAIT_CS_STATUS_COMPLETED ;
3394+
3395+ hl_ctx_put (ctx );
3396+
3397+ return rc ;
3398+
3399+ put_ts_buff :
3400+ hl_mmap_mem_buf_put (data -> buf );
3401+ put_cq_cb :
3402+ hl_cb_put (data -> cq_cb );
3403+ put_ctx :
3404+ hl_ctx_put (ctx );
3405+
3406+ return rc ;
3407+ }
3408+
3409+ static int _hl_interrupt_wait_ioctl (struct hl_device * hdev , struct hl_ctx * ctx ,
3410+ struct wait_interrupt_data * data ,
3411+ u32 * status , u64 * timestamp )
3412+ {
3413+ struct hl_user_pending_interrupt * pend ;
3414+ unsigned long timeout , flags ;
3415+ long completion_rc ;
3416+ int rc = 0 ;
3417+
3418+ timeout = hl_usecs64_to_jiffies (data -> intr_timeout_us );
3419+
3420+ hl_ctx_get (ctx );
3421+
3422+ data -> cq_cb = hl_cb_get (data -> mmg , data -> cq_handle );
3423+ if (!data -> cq_cb ) {
3424+ rc = - EINVAL ;
3425+ goto put_ctx ;
3426+ }
3427+
3428+ /* Validate the cq offset */
3429+ if (((u64 * ) data -> cq_cb -> kernel_address + data -> cq_offset ) >=
3430+ ((u64 * ) data -> cq_cb -> kernel_address + (data -> cq_cb -> size / sizeof (u64 )))) {
3431+ rc = - EINVAL ;
3432+ goto put_cq_cb ;
3433+ }
3434+
3435+ pend = kzalloc (sizeof (* pend ), GFP_KERNEL );
3436+ if (!pend ) {
3437+ rc = - ENOMEM ;
3438+ goto put_cq_cb ;
3439+ }
3440+
3441+ hl_fence_init (& pend -> fence , ULONG_MAX );
3442+ pend -> cq_kernel_addr = (u64 * ) data -> cq_cb -> kernel_address + data -> cq_offset ;
3443+ pend -> cq_target_value = data -> target_value ;
3444+ spin_lock_irqsave (& data -> interrupt -> wait_list_lock , flags );
3445+
3446+
3447+ /* We check for completion value as interrupt could have been received
3448+ * before we add the wait node to the wait list.
3449+ */
3450+ if (* pend -> cq_kernel_addr >= data -> target_value || (!data -> intr_timeout_us )) {
3451+ spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , flags );
3452+
3453+ if (* pend -> cq_kernel_addr >= data -> target_value )
3454+ * status = HL_WAIT_CS_STATUS_COMPLETED ;
3455+ else
3456+ * status = HL_WAIT_CS_STATUS_BUSY ;
33993457
3400- if (register_ts_record ) {
3401- * pend -> ts_reg_info .timestamp_kernel_addr = ktime_get_ns ();
3402- goto put_ts_buff ;
3403- } else {
3404- pend -> fence .timestamp = ktime_get ();
3405- goto set_timestamp ;
3406- }
3407- } else if (!data -> intr_timeout_us ) {
3408- spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , data -> flags );
3409- * status = HL_WAIT_CS_STATUS_BUSY ;
34103458 pend -> fence .timestamp = ktime_get ();
34113459 goto set_timestamp ;
34123460 }
@@ -3417,13 +3465,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
34173465 * in order to shorten the list pass loop, since
34183466 * same list could have nodes for different cq counter handle.
34193467 */
3420- list_add_tail (& pend -> wait_list_node , & data -> interrupt -> wait_list_head );
3421- spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , data -> flags );
3422-
3423- if (register_ts_record ) {
3424- rc = * status = HL_WAIT_CS_STATUS_COMPLETED ;
3425- goto ts_registration_exit ;
3426- }
3468+ list_add_tail (& pend -> list_node , & data -> interrupt -> wait_list_head );
3469+ spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , flags );
34273470
34283471 /* Wait for interrupt handler to signal completion */
34293472 completion_rc = wait_for_completion_interruptible_timeout (& pend -> fence .completion ,
@@ -3462,21 +3505,18 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
34623505 * for ts record, the node will be deleted in the irq handler after
34633506 * we reach the target value.
34643507 */
3465- spin_lock_irqsave (& data -> interrupt -> wait_list_lock , data -> flags );
3466- list_del (& pend -> wait_list_node );
3467- spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , data -> flags );
3508+ spin_lock_irqsave (& data -> interrupt -> wait_list_lock , flags );
3509+ list_del (& pend -> list_node );
3510+ spin_unlock_irqrestore (& data -> interrupt -> wait_list_lock , flags );
34683511
34693512set_timestamp :
34703513 * timestamp = ktime_to_ns (pend -> fence .timestamp );
34713514 kfree (pend );
34723515 hl_cb_put (data -> cq_cb );
3473- ts_registration_exit :
34743516 hl_ctx_put (ctx );
34753517
34763518 return rc ;
34773519
3478- put_ts_buff :
3479- hl_mmap_mem_buf_put (data -> buf );
34803520put_cq_cb :
34813521 hl_cb_put (data -> cq_cb );
34823522put_ctx :
@@ -3513,7 +3553,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
35133553 * handler to monitor
35143554 */
35153555 spin_lock (& interrupt -> wait_list_lock );
3516- list_add_tail (& pend -> wait_list_node , & interrupt -> wait_list_head );
3556+ list_add_tail (& pend -> list_node , & interrupt -> wait_list_head );
35173557 spin_unlock (& interrupt -> wait_list_lock );
35183558
35193559 /* We check for completion value as interrupt could have been received
@@ -3590,7 +3630,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
35903630
35913631remove_pending_user_interrupt :
35923632 spin_lock (& interrupt -> wait_list_lock );
3593- list_del (& pend -> wait_list_node );
3633+ list_del (& pend -> list_node );
35943634 spin_unlock (& interrupt -> wait_list_lock );
35953635
35963636 * timestamp = ktime_to_ns (pend -> fence .timestamp );
@@ -3649,16 +3689,6 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
36493689 return - EINVAL ;
36503690 }
36513691
3652- /*
3653- * Allow only one registration at a time. this is needed in order to prevent issues
3654- * while handling the flow of re-use of the same offset.
3655- * Since the registration flow is protected only by the interrupt lock, re-use flow
3656- * might request to move ts node to another interrupt list, and in such case we're
3657- * not protected.
3658- */
3659- if (args -> in .flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT )
3660- mutex_lock (& hpriv -> ctx -> ts_reg_lock );
3661-
36623692 if (args -> in .flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ ) {
36633693 struct wait_interrupt_data wait_intr_data = {0 };
36643694
@@ -3671,19 +3701,30 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
36713701 wait_intr_data .target_value = args -> in .target ;
36723702 wait_intr_data .intr_timeout_us = args -> in .interrupt_timeout_us ;
36733703
3674- rc = _hl_interrupt_wait_ioctl (hdev , hpriv -> ctx , & wait_intr_data ,
3675- !!(args -> in .flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT ),
3676- & status , & timestamp );
3704+ if (args -> in .flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT ) {
3705+ /*
3706+ * Allow only one registration at a time. this is needed in order to prevent
3707+ * issues while handling the flow of re-use of the same offset.
3708+ * Since the registration flow is protected only by the interrupt lock,
3709+ * re-use flow might request to move ts node to another interrupt list,
3710+ * and in such case we're not protected.
3711+ */
3712+ mutex_lock (& hpriv -> ctx -> ts_reg_lock );
3713+
3714+ rc = _hl_interrupt_ts_reg_ioctl (hdev , hpriv -> ctx , & wait_intr_data ,
3715+ & status , & timestamp );
3716+
3717+ mutex_unlock (& hpriv -> ctx -> ts_reg_lock );
3718+ } else
3719+ rc = _hl_interrupt_wait_ioctl (hdev , hpriv -> ctx , & wait_intr_data ,
3720+ & status , & timestamp );
36773721 } else {
36783722 rc = _hl_interrupt_wait_ioctl_user_addr (hdev , hpriv -> ctx ,
36793723 args -> in .interrupt_timeout_us , args -> in .addr ,
36803724 args -> in .target , interrupt , & status ,
36813725 & timestamp );
36823726 }
36833727
3684- if (args -> in .flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT )
3685- mutex_unlock (& hpriv -> ctx -> ts_reg_lock );
3686-
36873728 if (rc )
36883729 return rc ;
36893730
0 commit comments