@@ -95,8 +95,6 @@ struct ucma_context {
9595 u64 uid ;
9696
9797 struct list_head list ;
98- /* sync between removal event and id destroy, protected by file mut */
99- int destroying ;
10098 struct work_struct close_work ;
10199};
102100
@@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table);
122120static DEFINE_XARRAY_ALLOC (multicast_table );
123121
124122static const struct file_operations ucma_fops ;
125- static int __destroy_id (struct ucma_context * ctx );
123+ static int ucma_destroy_private_ctx (struct ucma_context * ctx );
126124
127125static inline struct ucma_context * _ucma_find_context (int id ,
128126 struct ucma_file * file )
@@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work)
179177
180178 /* once all inflight tasks are finished, we close all underlying
181179 * resources. The context is still alive till its explicit destryoing
182- * by its creator.
180+ * by its creator. This puts back the xarray's reference.
183181 */
184182 ucma_put_ctx (ctx );
185183 wait_for_completion (& ctx -> comp );
186184 /* No new events will be generated after destroying the id. */
187185 rdma_destroy_id (ctx -> cm_id );
188186
189- /*
190- * At this point ctx->ref is zero so the only place the ctx can be is in
191- * a uevent or in __destroy_id(). Since the former doesn't touch
192- * ctx->cm_id and the latter sync cancels this, there is no races with
193- * this store.
194- */
187+ /* Reading the cm_id without holding a positive ref is not allowed */
195188 ctx -> cm_id = NULL ;
196189}
197190
@@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
204197 return NULL ;
205198
206199 INIT_WORK (& ctx -> close_work , ucma_close_id );
207- refcount_set (& ctx -> ref , 1 );
208200 init_completion (& ctx -> comp );
209201 /* So list_del() will work if we don't do ucma_finish_ctx() */
210202 INIT_LIST_HEAD (& ctx -> list );
@@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
218210 return ctx ;
219211}
220212
213+ static void ucma_set_ctx_cm_id (struct ucma_context * ctx ,
214+ struct rdma_cm_id * cm_id )
215+ {
216+ refcount_set (& ctx -> ref , 1 );
217+ ctx -> cm_id = cm_id ;
218+ }
219+
221220static void ucma_finish_ctx (struct ucma_context * ctx )
222221{
223222 lockdep_assert_held (& ctx -> file -> mut );
@@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
303302 ctx = ucma_alloc_ctx (listen_ctx -> file );
304303 if (!ctx )
305304 goto err_backlog ;
306- ctx -> cm_id = cm_id ;
305+ ucma_set_ctx_cm_id ( ctx , cm_id ) ;
307306
308307 uevent = ucma_create_uevent (listen_ctx , event );
309308 if (!uevent )
@@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
321320 return 0 ;
322321
323322err_alloc :
324- xa_erase (& ctx_table , ctx -> id );
325- kfree (ctx );
323+ ucma_destroy_private_ctx (ctx );
326324err_backlog :
327325 atomic_inc (& listen_ctx -> backlog );
328326 /* Returning error causes the new ID to be destroyed */
@@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
356354 wake_up_interruptible (& ctx -> file -> poll_wait );
357355 }
358356
359- if (event -> event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx -> destroying )
360- queue_work (system_unbound_wq , & ctx -> close_work );
357+ if (event -> event == RDMA_CM_EVENT_DEVICE_REMOVAL ) {
358+ xa_lock (& ctx_table );
359+ if (xa_load (& ctx_table , ctx -> id ) == ctx )
360+ queue_work (system_unbound_wq , & ctx -> close_work );
361+ xa_unlock (& ctx_table );
362+ }
361363 return 0 ;
362364}
363365
@@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
461463 ret = PTR_ERR (cm_id );
462464 goto err1 ;
463465 }
464- ctx -> cm_id = cm_id ;
466+ ucma_set_ctx_cm_id ( ctx , cm_id ) ;
465467
466468 resp .id = ctx -> id ;
467469 if (copy_to_user (u64_to_user_ptr (cmd .response ),
468470 & resp , sizeof (resp ))) {
469- xa_erase (& ctx_table , ctx -> id );
470- __destroy_id (ctx );
471+ ucma_destroy_private_ctx (ctx );
471472 return - EFAULT ;
472473 }
473474
@@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
477478 return 0 ;
478479
479480err1 :
480- xa_erase (& ctx_table , ctx -> id );
481- kfree (ctx );
481+ ucma_destroy_private_ctx (ctx );
482482 return ret ;
483483}
484484
@@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
516516 rdma_unlock_handler (mc -> ctx -> cm_id );
517517}
518518
519- /*
520- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
521- * this point, no new events will be reported from the hardware. However, we
522- * still need to cleanup the UCMA context for this ID. Specifically, there
523- * might be events that have not yet been consumed by the user space software.
524- * mutex. After that we release them as needed.
525- */
526- static int ucma_free_ctx (struct ucma_context * ctx )
519+ static int ucma_cleanup_ctx_events (struct ucma_context * ctx )
527520{
528521 int events_reported ;
529522 struct ucma_event * uevent , * tmp ;
530523 LIST_HEAD (list );
531524
532- ucma_cleanup_multicast (ctx );
533-
534- /* Cleanup events not yet reported to the user. */
525+ /* Cleanup events not yet reported to the user.*/
535526 mutex_lock (& ctx -> file -> mut );
536527 list_for_each_entry_safe (uevent , tmp , & ctx -> file -> event_list , list ) {
537- if (uevent -> ctx == ctx || uevent -> conn_req_ctx == ctx )
528+ if (uevent -> ctx != ctx )
529+ continue ;
530+
531+ if (uevent -> resp .event == RDMA_CM_EVENT_CONNECT_REQUEST &&
532+ xa_cmpxchg (& ctx_table , uevent -> conn_req_ctx -> id ,
533+ uevent -> conn_req_ctx , XA_ZERO_ENTRY ,
534+ GFP_KERNEL ) == uevent -> conn_req_ctx ) {
538535 list_move_tail (& uevent -> list , & list );
536+ continue ;
537+ }
538+ list_del (& uevent -> list );
539+ kfree (uevent );
539540 }
540541 list_del (& ctx -> list );
541542 events_reported = ctx -> events_reported ;
542543 mutex_unlock (& ctx -> file -> mut );
543544
544545 /*
545- * If this was a listening ID then any connections spawned from it
546- * that have not been delivered to userspace are cleaned up too.
547- * Must be done outside any locks.
546+ * If this was a listening ID then any connections spawned from it that
547+ * have not been delivered to userspace are cleaned up too. Must be done
548+ * outside any locks.
548549 */
549550 list_for_each_entry_safe (uevent , tmp , & list , list ) {
550- list_del (& uevent -> list );
551- if (uevent -> resp .event == RDMA_CM_EVENT_CONNECT_REQUEST &&
552- uevent -> conn_req_ctx != ctx )
553- __destroy_id (uevent -> conn_req_ctx );
551+ ucma_destroy_private_ctx (uevent -> conn_req_ctx );
554552 kfree (uevent );
555553 }
556-
557- mutex_destroy (& ctx -> mutex );
558- kfree (ctx );
559554 return events_reported ;
560555}
561556
562- static int __destroy_id (struct ucma_context * ctx )
557+ /*
558+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
559+ * the ctx is not public to the user). This either because:
560+ * - ucma_finish_ctx() hasn't been called
561+ * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
562+ */
563+ static int ucma_destroy_private_ctx (struct ucma_context * ctx )
563564{
565+ int events_reported ;
566+
564567 /*
565- * If the refcount is already 0 then ucma_close_id() has already
566- * destroyed the cm_id, otherwise holding the refcount keeps cm_id
567- * valid. Prevent queue_work() from being called.
568+ * Destroy the underlying cm_id. New work queuing is prevented now by
569+ * the removal from the xarray. Once the work is cancled ref will either
570+ * be 0 because the work ran to completion and consumed the ref from the
571+ * xarray, or it will be positive because we still have the ref from the
572+ * xarray. This can also be 0 in cases where cm_id was never set
568573 */
569- if (refcount_inc_not_zero (& ctx -> ref )) {
570- rdma_lock_handler (ctx -> cm_id );
571- ctx -> destroying = 1 ;
572- rdma_unlock_handler (ctx -> cm_id );
573- ucma_put_ctx (ctx );
574- }
575-
576574 cancel_work_sync (& ctx -> close_work );
577- /* At this point it's guaranteed that there is no inflight closing task */
578- if (ctx -> cm_id )
575+ if (refcount_read (& ctx -> ref ))
579576 ucma_close_id (& ctx -> close_work );
580- return ucma_free_ctx (ctx );
577+
578+ events_reported = ucma_cleanup_ctx_events (ctx );
579+ ucma_cleanup_multicast (ctx );
580+
581+ WARN_ON (xa_cmpxchg (& ctx_table , ctx -> id , XA_ZERO_ENTRY , NULL ,
582+ GFP_KERNEL ) != NULL );
583+ mutex_destroy (& ctx -> mutex );
584+ kfree (ctx );
585+ return events_reported ;
581586}
582587
583588static ssize_t ucma_destroy_id (struct ucma_file * file , const char __user * inbuf ,
@@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
596601
597602 xa_lock (& ctx_table );
598603 ctx = _ucma_find_context (cmd .id , file );
599- if (!IS_ERR (ctx ))
600- __xa_erase (& ctx_table , ctx -> id );
604+ if (!IS_ERR (ctx )) {
605+ if (__xa_cmpxchg (& ctx_table , ctx -> id , ctx , XA_ZERO_ENTRY ,
606+ GFP_KERNEL ) != ctx )
607+ ctx = ERR_PTR (- ENOENT );
608+ }
601609 xa_unlock (& ctx_table );
602610
603611 if (IS_ERR (ctx ))
604612 return PTR_ERR (ctx );
605613
606- resp .events_reported = __destroy_id (ctx );
614+ resp .events_reported = ucma_destroy_private_ctx (ctx );
607615 if (copy_to_user (u64_to_user_ptr (cmd .response ),
608616 & resp , sizeof (resp )))
609617 ret = - EFAULT ;
@@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp)
17771785 * prevented by this being a FD release function. The list_add_tail() in
17781786 * ucma_connect_event_handler() can run concurrently, however it only
17791787 * adds to the list *after* a listening ID. By only reading the first of
1780- * the list, and relying on __destroy_id () to block
1788+ * the list, and relying on ucma_destroy_private_ctx () to block
17811789 * ucma_connect_event_handler(), no additional locking is needed.
17821790 */
17831791 while (!list_empty (& file -> ctx_list )) {
17841792 struct ucma_context * ctx = list_first_entry (
17851793 & file -> ctx_list , struct ucma_context , list );
17861794
1787- xa_erase (& ctx_table , ctx -> id );
1788- __destroy_id (ctx );
1795+ WARN_ON (xa_cmpxchg (& ctx_table , ctx -> id , ctx , XA_ZERO_ENTRY ,
1796+ GFP_KERNEL ) != ctx );
1797+ ucma_destroy_private_ctx (ctx );
17891798 }
17901799 kfree (file );
17911800 return 0 ;
0 commit comments