Skip to content

Commit b043220

Browse files
metze-sambasmfrench
authored andcommitted
smb: client: let destroy_mr_list() keep smbdirect_mr_io memory if registered
If a smbdirect_mr_io structure if still visible to callers of smbd_register_mr() we can't free the related memory when the connection is disconnected! Otherwise smbd_deregister_mr() will crash. Now we use a mutex and refcounting in order to keep the memory around if the connection is disconnected. It means smbd_deregister_mr() can be called at any later time to free the memory, which is no longer referenced by nor referencing the connection. It also means smbd_destroy() no longer needs to wait for mr_io.used.count to become 0. Fixes: 050b8c3 ("smbd: Make upper layer decide when to destroy the transport") Cc: Steve French <smfrench@gmail.com> Cc: Tom Talpey <tom@talpey.com> Cc: Long Li <longli@microsoft.com> Cc: Namjae Jeon <linkinjeon@kernel.org> Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher <metze@samba.org> Signed-off-by: Steve French <stfrench@microsoft.com>
1 parent 1ef0e16 commit b043220

1 file changed

Lines changed: 127 additions & 19 deletions

File tree

fs/smb/client/smbdirect.c

Lines changed: 127 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
16241624
log_rdma_event(INFO, "free receive buffers\n");
16251625
destroy_receive_buffers(sc);
16261626

1627-
/*
1628-
* For performance reasons, memory registration and deregistration
1629-
* are not locked by srv_mutex. It is possible some processes are
1630-
* blocked on transport srv_mutex while holding memory registration.
1631-
* Release the transport srv_mutex to allow them to hit the failure
1632-
* path when sending data, and then release memory registrations.
1633-
*/
16341627
log_rdma_event(INFO, "freeing mr list\n");
1635-
while (atomic_read(&sc->mr_io.used.count)) {
1636-
cifs_server_unlock(server);
1637-
msleep(1000);
1638-
cifs_server_lock(server);
1639-
}
16401628
destroy_mr_list(sc);
16411629

16421630
ib_free_cq(sc->ib.send_cq);
@@ -2352,6 +2340,46 @@ static void smbd_mr_recovery_work(struct work_struct *work)
23522340
}
23532341
}
23542342

2343+
static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr)
2344+
{
2345+
struct smbdirect_socket *sc = mr->socket;
2346+
2347+
lockdep_assert_held(&mr->mutex);
2348+
2349+
if (mr->state == SMBDIRECT_MR_DISABLED)
2350+
return;
2351+
2352+
if (mr->mr)
2353+
ib_dereg_mr(mr->mr);
2354+
if (mr->sgt.nents)
2355+
ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
2356+
kfree(mr->sgt.sgl);
2357+
2358+
mr->mr = NULL;
2359+
mr->sgt.sgl = NULL;
2360+
mr->sgt.nents = 0;
2361+
2362+
mr->state = SMBDIRECT_MR_DISABLED;
2363+
}
2364+
2365+
static void smbd_mr_free_locked(struct kref *kref)
2366+
{
2367+
struct smbdirect_mr_io *mr =
2368+
container_of(kref, struct smbdirect_mr_io, kref);
2369+
2370+
lockdep_assert_held(&mr->mutex);
2371+
2372+
/*
2373+
* smbd_mr_disable_locked() should already be called!
2374+
*/
2375+
if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
2376+
smbd_mr_disable_locked(mr);
2377+
2378+
mutex_unlock(&mr->mutex);
2379+
mutex_destroy(&mr->mutex);
2380+
kfree(mr);
2381+
}
2382+
23552383
static void destroy_mr_list(struct smbdirect_socket *sc)
23562384
{
23572385
struct smbdirect_mr_io *mr, *tmp;
@@ -2365,13 +2393,31 @@ static void destroy_mr_list(struct smbdirect_socket *sc)
23652393
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
23662394

23672395
list_for_each_entry_safe(mr, tmp, &all_list, list) {
2368-
if (mr->mr)
2369-
ib_dereg_mr(mr->mr);
2370-
if (mr->sgt.nents)
2371-
ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
2372-
kfree(mr->sgt.sgl);
2396+
mutex_lock(&mr->mutex);
2397+
2398+
smbd_mr_disable_locked(mr);
23732399
list_del(&mr->list);
2374-
kfree(mr);
2400+
mr->socket = NULL;
2401+
2402+
/*
2403+
* No kref_put_mutex() as it's already locked.
2404+
*
2405+
* If smbd_mr_free_locked() is called
2406+
* and the mutex is unlocked and mr is gone,
2407+
* in that case kref_put() returned 1.
2408+
*
2409+
* If kref_put() returned 0 we know that
2410+
* smbd_mr_free_locked() didn't
2411+
* run. Not by us nor by anyone else, as we
2412+
* still hold the mutex, so we need to unlock.
2413+
*
2414+
* If the mr is still registered it will
2415+
* be dangling (detached from the connection
2416+
* waiting for smbd_deregister_mr() to be
2417+
* called in order to free the memory.
2418+
*/
2419+
if (!kref_put(&mr->kref, smbd_mr_free_locked))
2420+
mutex_unlock(&mr->mutex);
23752421
}
23762422
}
23772423

@@ -2402,6 +2448,9 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
24022448
goto kzalloc_mr_failed;
24032449
}
24042450

2451+
kref_init(&mr->kref);
2452+
mutex_init(&mr->mutex);
2453+
24052454
mr->mr = ib_alloc_mr(sc->ib.pd,
24062455
sc->mr_io.type,
24072456
sp->max_frmr_depth);
@@ -2434,6 +2483,7 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
24342483
kcalloc_sgl_failed:
24352484
ib_dereg_mr(mr->mr);
24362485
ib_alloc_mr_failed:
2486+
mutex_destroy(&mr->mutex);
24372487
kfree(mr);
24382488
kzalloc_mr_failed:
24392489
destroy_mr_list(sc);
@@ -2471,6 +2521,7 @@ static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc)
24712521
list_for_each_entry(ret, &sc->mr_io.all.list, list) {
24722522
if (ret->state == SMBDIRECT_MR_READY) {
24732523
ret->state = SMBDIRECT_MR_REGISTERED;
2524+
kref_get(&ret->kref);
24742525
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
24752526
atomic_dec(&sc->mr_io.ready.count);
24762527
atomic_inc(&sc->mr_io.used.count);
@@ -2535,6 +2586,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25352586
return NULL;
25362587
}
25372588

2589+
mutex_lock(&mr->mutex);
2590+
25382591
mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
25392592
mr->need_invalidate = need_invalidate;
25402593
mr->sgt.nents = 0;
@@ -2578,8 +2631,16 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25782631
* on the next ib_post_send when we actually send I/O to remote peer
25792632
*/
25802633
rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
2581-
if (!rc)
2634+
if (!rc) {
2635+
/*
2636+
* get_mr() gave us a reference
2637+
* via kref_get(&mr->kref), we keep that and let
2638+
* the caller use smbd_deregister_mr()
2639+
* to remove it again.
2640+
*/
2641+
mutex_unlock(&mr->mutex);
25822642
return mr;
2643+
}
25832644

25842645
log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
25852646
rc, reg_wr->key);
@@ -2596,6 +2657,25 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25962657

25972658
smbd_disconnect_rdma_connection(sc);
25982659

2660+
/*
2661+
* get_mr() gave us a reference
2662+
* via kref_get(&mr->kref), we need to remove it again
2663+
* on error.
2664+
*
2665+
* No kref_put_mutex() as it's already locked.
2666+
*
2667+
* If smbd_mr_free_locked() is called
2668+
* and the mutex is unlocked and mr is gone,
2669+
* in that case kref_put() returned 1.
2670+
*
2671+
* If kref_put() returned 0 we know that
2672+
* smbd_mr_free_locked() didn't
2673+
* run. Not by us nor by anyone else, as we
2674+
* still hold the mutex, so we need to unlock.
2675+
*/
2676+
if (!kref_put(&mr->kref, smbd_mr_free_locked))
2677+
mutex_unlock(&mr->mutex);
2678+
25992679
return NULL;
26002680
}
26012681

@@ -2624,6 +2704,15 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26242704
{
26252705
struct smbdirect_socket *sc = mr->socket;
26262706

2707+
mutex_lock(&mr->mutex);
2708+
if (mr->state == SMBDIRECT_MR_DISABLED)
2709+
goto put_kref;
2710+
2711+
if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
2712+
smbd_mr_disable_locked(mr);
2713+
goto put_kref;
2714+
}
2715+
26272716
if (mr->need_invalidate) {
26282717
struct ib_send_wr *wr = &mr->inv_wr;
26292718
int rc;
@@ -2640,6 +2729,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26402729
rc = ib_post_send(sc->ib.qp, wr, NULL);
26412730
if (rc) {
26422731
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
2732+
smbd_mr_disable_locked(mr);
26432733
smbd_disconnect_rdma_connection(sc);
26442734
goto done;
26452735
}
@@ -2671,6 +2761,24 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26712761
done:
26722762
if (atomic_dec_and_test(&sc->mr_io.used.count))
26732763
wake_up(&sc->mr_io.cleanup.wait_queue);
2764+
2765+
put_kref:
2766+
/*
2767+
* No kref_put_mutex() as it's already locked.
2768+
*
2769+
* If smbd_mr_free_locked() is called
2770+
* and the mutex is unlocked and mr is gone,
2771+
* in that case kref_put() returned 1.
2772+
*
2773+
* If kref_put() returned 0 we know that
2774+
* smbd_mr_free_locked() didn't
2775+
* run. Not by us nor by anyone else, as we
2776+
* still hold the mutex, so we need to unlock
2777+
* and keep the mr in SMBDIRECT_MR_READY or
2778+
* SMBDIRECT_MR_ERROR state.
2779+
*/
2780+
if (!kref_put(&mr->kref, smbd_mr_free_locked))
2781+
mutex_unlock(&mr->mutex);
26742782
}
26752783

26762784
static bool smb_set_sge(struct smb_extract_to_rdma *rdma,

0 commit comments

Comments
 (0)