diff --git a/src/wh_client.c b/src/wh_client.c
index 5c22f58ae..f577bb993 100644
--- a/src/wh_client.c
+++ b/src/wh_client.c
@@ -1430,30 +1430,48 @@ int wh_Client_KeyCacheDmaRequest(whClientContext* c, uint32_t flags,
                                  const void* keyAddr, uint16_t keySz,
                                  uint16_t keyId)
 {
-    int                                ret;
-    whMessageKeystore_CacheDmaRequest* req = NULL;
-    uintptr_t                          keyAddrPtr = 0;
-    uint16_t                           capSz      = 0;
+    int                                ret             = WH_ERROR_OK;
+    whMessageKeystore_CacheDmaRequest* req             = NULL;
+    uintptr_t                          keyAddrPtr      = 0;
+    uint16_t                           capSz           = 0;
+    int                                keyAddrAcquired = 0;
 
     if (c == NULL || (labelSz > 0 && label == NULL)) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req = (whMessageKeystore_CacheDmaRequest*)wh_CommClient_GetDataPtr(c->comm);
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
     memset(req, 0, sizeof(*req));
-    req->id      = keyId;
-    req->flags   = flags;
-    req->labelSz = 0;
-
-    /* Set up DMA buffer info */
+    req->id       = keyId;
+    req->flags    = flags;
+    req->labelSz  = 0;
     req->key.sz   = keySz;
-    ret           = wh_Client_DmaProcessClientAddress(
+    req->key.addr = 0;
+
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the input key buffer. POST runs in the Response, not here:
+     * the server reads the buffer between request and response, so an
+     * in-request POST would free the scratch too early (use-after-free). */
+    ret = wh_Client_DmaProcessClientAddress(
         c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
         WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
-    req->key.addr = keyAddrPtr;
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    }
 
     /* Copy label if provided, truncate if necessary */
     if (labelSz > 0 && label != NULL) {
@@ -1467,9 +1485,10 @@ int wh_Client_KeyCacheDmaRequest(whClientContext* c, uint32_t flags,
                                     sizeof(*req), (uint8_t*)req);
     }
 
-    (void)wh_Client_DmaProcessClientAddress(
-        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
-        WH_DMA_OPER_CLIENT_READ_POST, (whDmaFlags){0});
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        /* SendRequest failed: the Response will not run, so POST now. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
     return ret;
 }
 
@@ -1492,6 +1511,9 @@ int wh_Client_KeyCacheDmaResponse(whClientContext* c, uint16_t* keyId)
     }
 
     ret = wh_Client_RecvResponse(c, &group, &action, &size, (uint8_t*)resp);
+    if (ret == WH_ERROR_NOTREADY) {
+        return ret;
+    }
 
     if (ret == 0) {
         /* Validate response */
@@ -1510,6 +1532,15 @@ int wh_Client_KeyCacheDmaResponse(whClientContext* c, uint16_t* keyId)
             }
         }
     }
+
+    /* POST cleanup: release the mapping once the server has read it. Surface a
+     * POST failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (ret == WH_ERROR_OK) {
+            ret = postRc;
+        }
+    }
     return ret;
 }
 
@@ -1531,23 +1562,56 @@ int wh_Client_KeyCacheDma(whClientContext* c, uint32_t flags, uint8_t* label,
 int wh_Client_KeyExportDmaRequest(whClientContext* c, uint16_t keyId,
                                   const void* keyAddr, uint16_t keySz)
 {
-    whMessageKeystore_ExportDmaRequest* req = NULL;
+    whMessageKeystore_ExportDmaRequest* req             = NULL;
+    uintptr_t                           keyAddrPtr      = 0;
+    int                                 ret             = WH_ERROR_OK;
+    int                                 keyAddrAcquired = 0;
 
     if (c == NULL || keyId == WH_KEYID_ERASED) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req =
         (whMessageKeystore_ExportDmaRequest*)wh_CommClient_GetDataPtr(c->comm);
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
+
     req->id       = keyId;
-    req->key.addr = (uint64_t)((uintptr_t)keyAddr);
+    req->key.addr = 0;
     req->key.sz   = keySz;
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY, WH_KEY_EXPORT_DMA,
-                                 sizeof(*req), (uint8_t*)req);
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output key buffer; the server fills it and the
+     * Response POST copies the result back and releases it. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
+        WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+    }
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY, WH_KEY_EXPORT_DMA,
+                                    sizeof(*req), (uint8_t*)req);
+    }
+
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        /* SendRequest failed: the Response will not run, so POST now. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
@@ -1571,6 +1635,9 @@ int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size,
                                 (uint8_t*)resp);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_KEY) ||
@@ -1595,6 +1662,15 @@ int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
             }
         }
     }
+
+    /* POST cleanup: copy results back and release the mapping; surface a POST
+     * failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
@@ -1616,11 +1692,18 @@ int wh_Client_KeyExportPublicDmaRequest(whClientContext* c, whKeyId keyId,
                                         uint16_t algo, void* keyAddr,
                                         uint16_t keySz)
 {
-    whMessageKeystore_ExportPublicDmaRequest* req = NULL;
+    whMessageKeystore_ExportPublicDmaRequest* req             = NULL;
+    uintptr_t                                 keyAddrPtr      = 0;
+    int                                       ret             = WH_ERROR_OK;
+    int                                       keyAddrAcquired = 0;
 
     if (c == NULL || keyId == WH_KEYID_ERASED) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req =
         (whMessageKeystore_ExportPublicDmaRequest*)wh_CommClient_GetDataPtr(
@@ -1628,14 +1711,38 @@ int wh_Client_KeyExportPublicDmaRequest(whClientContext* c, whKeyId keyId,
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
+
     req->id       = keyId;
     req->algo     = algo;
-    req->key.addr = (uint64_t)((uintptr_t)keyAddr);
+    req->key.addr = 0;
     req->key.sz   = keySz;
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY,
-                                 WH_KEY_EXPORT_PUBLIC_DMA, sizeof(*req),
-                                 (uint8_t*)req);
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output public key buffer; see KeyExportDmaRequest. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
+        WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+    }
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY,
+                                    WH_KEY_EXPORT_PUBLIC_DMA, sizeof(*req),
+                                    (uint8_t*)req);
+    }
+
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
@@ -1660,6 +1767,9 @@ int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size,
                                 (uint8_t*)resp);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         if (resp_size != sizeof(*resp)) {
             rc = WH_ERROR_ABORTED;
@@ -1679,6 +1789,14 @@ int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
             }
         }
     }
+
+    /* POST cleanup; see KeyExportDmaResponse. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
diff --git a/src/wh_client_dma.c b/src/wh_client_dma.c
index 7dee4bef6..ecaa35331 100644
--- a/src/wh_client_dma.c
+++ b/src/wh_client_dma.c
@@ -36,10 +36,12 @@
 int wh_Client_DmaRegisterAllowList(whClientContext*          client,
                                    const whDmaAddrAllowList* allowlist)
 {
-    if (NULL == client || NULL == allowlist) {
+    if (NULL == client) {
         return WH_ERROR_BADARGS;
     }
 
+    /* A NULL allowlist clears any previously registered list (no enforcement),
+     * symmetric with wh_Client_DmaRegisterCb(NULL). */
     client->dma.dmaAddrAllowList = allowlist;
 
     return WH_ERROR_OK;
@@ -94,4 +96,23 @@ int wh_Client_DmaProcessClientAddress(whClientContext* client,
     }
     return rc;
 }
+
+int wh_Client_DmaAsyncPost(whClientContext* client, whClientDmaAsyncBuf* buf)
+{
+    int       rc;
+    uintptr_t addr;
+
+    if (client == NULL || buf == NULL || buf->sz == 0) {
+        return WH_ERROR_OK;
+    }
+
+    addr = buf->xformedAddr;
+    rc   = wh_Client_DmaProcessClientAddress(client, buf->clientAddr,
+                                             (void**)&addr, (size_t)buf->sz,
+                                             buf->postOper, (whDmaFlags){0});
+    /* Clear the slot even on failure so a later Response cannot re-run the
+     * POST; the failure is returned to the caller. */
+    buf->sz = 0;
+    return rc;
+}
 #endif /* WOLFHSM_CFG_DMA */
diff --git a/src/wh_client_nvm.c b/src/wh_client_nvm.c
index 5742b97c7..1327275c5 100644
--- a/src/wh_client_nvm.c
+++ b/src/wh_client_nvm.c
@@ -695,19 +695,67 @@ int wh_Client_NvmAddObjectDmaRequest(whClientContext* c,
                                      whNvmMetadata*   metadata,
                                      whNvmSize data_len, const uint8_t* data)
 {
-    whMessageNvm_AddObjectDmaRequest msg = {0};
+    whMessageNvm_AddObjectDmaRequest msg         = {0};
+    uintptr_t                        metaAddrPtr = 0;
+    uintptr_t                        dataAddrPtr = 0;
+    int                              ret         = WH_ERROR_OK;
 
     if (c == NULL) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
+
+    /* Clear both slots up front: a metadata-only object leaves the data slot
+     * unset, and the Response must not POST a stale (shared-union) size. */
+    c->dma.asyncCtx.nvmAdd.meta.sz = 0;
+    c->dma.asyncCtx.nvmAdd.data.sz = 0;
+
+    /* PRE-translate the metadata struct (fixed size) and the optional data
+     * buffer; the matching Response POST releases them. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)metadata, (void**)&metaAddrPtr, sizeof(whNvmMetadata),
+        WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        c->dma.asyncCtx.nvmAdd.meta.xformedAddr = metaAddrPtr;
+        c->dma.asyncCtx.nvmAdd.meta.clientAddr  = (uintptr_t)metadata;
+        c->dma.asyncCtx.nvmAdd.meta.sz          = sizeof(whNvmMetadata);
+        c->dma.asyncCtx.nvmAdd.meta.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    }
+
+    if (ret == WH_ERROR_OK && data != NULL && data_len > 0) {
+        ret = wh_Client_DmaProcessClientAddress(
+            c, (uintptr_t)data, (void**)&dataAddrPtr, data_len,
+            WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
+        if (ret == WH_ERROR_OK) {
+            c->dma.asyncCtx.nvmAdd.data.xformedAddr = dataAddrPtr;
+            c->dma.asyncCtx.nvmAdd.data.clientAddr  = (uintptr_t)data;
+            c->dma.asyncCtx.nvmAdd.data.sz          = data_len;
+            c->dma.asyncCtx.nvmAdd.data.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+        }
+    }
 
-    msg.metadata_hostaddr = (uint64_t)(uintptr_t)metadata;
-    msg.data_hostaddr     = (uint64_t)(uintptr_t)data;
-    msg.data_len          = data_len;
+    msg.metadata_hostaddr = (uint64_t)metaAddrPtr;
+    /* 0 when there is no data buffer to DMA (dataAddrPtr is set only by the
+     * data PRE); never forward a raw, untranslated client pointer. */
+    msg.data_hostaddr = (uint64_t)dataAddrPtr;
+    msg.data_len      = data_len;
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
+                                    WH_MESSAGE_NVM_ACTION_ADDOBJECTDMA,
+                                    sizeof(msg), &msg);
+    }
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
-                                 WH_MESSAGE_NVM_ACTION_ADDOBJECTDMA,
-                                 sizeof(msg), &msg);
+    if (ret != WH_ERROR_OK) {
+        /* Send/PRE failed: release whatever was acquired (helper no-ops on the
+         * unset slot), in reverse order. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.data);
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.meta);
+    }
+    return ret;
 }
 
 int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
@@ -723,6 +771,9 @@ int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
     }
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size, &msg);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_NVM) ||
@@ -738,6 +789,16 @@ int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
             }
         }
     }
+
+    /* POST cleanup for both slots, reverse acquisition order; surface a POST
+     * failure if the operation otherwise succeeded. */
+    {
+        int postData = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.data);
+        int postMeta = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.meta);
+        if (rc == WH_ERROR_OK) {
+            rc = (postData != WH_ERROR_OK) ? postData : postMeta;
+        }
+    }
     return rc;
 }
 
@@ -766,19 +827,53 @@ int wh_Client_NvmReadDmaRequest(whClientContext* c, whNvmId id,
                                 whNvmSize offset, whNvmSize data_len,
                                 uint8_t* data)
 {
-    whMessageNvm_ReadDmaRequest msg = {0};
+    whMessageNvm_ReadDmaRequest msg              = {0};
+    uintptr_t                   dataAddrPtr      = 0;
+    int                         ret              = WH_ERROR_OK;
+    int                         dataAddrAcquired = 0;
 
     if (c == NULL) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
+
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output data buffer (only when there is one); the server
+     * writes the NVM contents and the Response POST copies them back. Skipping
+     * the empty case keeps a raw, untranslated pointer out of the message. */
+    if (data != NULL && data_len > 0) {
+        ret = wh_Client_DmaProcessClientAddress(
+            c, (uintptr_t)data, (void**)&dataAddrPtr, data_len,
+            WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+        if (ret == WH_ERROR_OK) {
+            dataAddrAcquired                = 1;
+            c->dma.asyncCtx.buf.xformedAddr = dataAddrPtr;
+            c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)data;
+            c->dma.asyncCtx.buf.sz          = data_len;
+            c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+        }
+    }
 
     msg.id            = id;
     msg.offset        = offset;
     msg.data_len      = data_len;
-    msg.data_hostaddr = (uint64_t)(uintptr_t)data;
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
-                                 WH_MESSAGE_NVM_ACTION_READDMA, sizeof(msg),
-                                 &msg);
+    msg.data_hostaddr = (uint64_t)dataAddrPtr;
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
+                                    WH_MESSAGE_NVM_ACTION_READDMA, sizeof(msg),
+                                    &msg);
+    }
+
+    if (ret != WH_ERROR_OK && dataAddrAcquired) {
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
@@ -794,6 +889,9 @@ int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
     }
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size, &msg);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_NVM) ||
@@ -809,6 +907,15 @@ int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
             }
         }
     }
+
+    /* POST cleanup: copy the server's writes back and release the mapping;
+     * surface a POST failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
diff --git a/src/wh_server_dma.c b/src/wh_server_dma.c
index 023aeaddf..21c78384b 100644
--- a/src/wh_server_dma.c
+++ b/src/wh_server_dma.c
@@ -77,10 +77,12 @@ int wh_Server_DmaRegisterMemCopyCb(whServerContext* server,
 int wh_Server_DmaRegisterAllowList(whServerContext*                server,
                                    const whServerDmaAddrAllowList* allowlist)
 {
-    if (NULL == server || NULL == allowlist) {
+    if (NULL == server) {
         return WH_ERROR_BADARGS;
     }
 
+    /* A NULL allowlist clears any previously registered list (no enforcement),
+     * symmetric with wh_Server_DmaRegisterCb(NULL). */
     server->dma.dmaAddrAllowList = allowlist;
 
     return WH_ERROR_OK;
diff --git a/test/wh_test_clientserver.c b/test/wh_test_clientserver.c
index de8e66561..baacc9f58 100644
--- a/test/wh_test_clientserver.c
+++ b/test/wh_test_clientserver.c
@@ -24,6 +24,7 @@
 
 #include "wh_test_common.h"
 #include "wh_test_clientserver.h"
+#include "wh_test_dma.h"
 #include "wolfhsm/wh_error.h"
 
 #include "wolfhsm/wh_comm.h"
@@ -406,8 +407,203 @@ static int _testDma(whServerContext* server, whClientContext* client)
                               sizeof(testMem.srvBufAllow),
                               (whServerDmaFlags){0}));
 
+    /* Unregister the allow list: it points at this function's stack-local
+     * testMem, which is about to go out of scope. */
+    WH_TEST_RETURN_ON_FAIL(wh_Server_DmaRegisterAllowList(server, NULL));
+
     return rc;
 }
+
+/*
+ * Drive the keystore/NVM *Dma client APIs end-to-end through the shared
+ * bounce-pool translating DMA callback (see test/wh_test_dma.c). The server can
+ * only touch the pool, so any API that forgets to translate is rejected; the
+ * single-thread pump makes the old KeyCacheDma use-after-free deterministic.
+ * On failure control jumps to cleanup so the callbacks are always unregistered.
+ */
+#define BOUNCE_TEST_NVM_ID 0x4242 /* arbitrary id, destroyed at end of test */
+
+/* Local fail/assert helpers that unwind to cleanup instead of returning. */
+#define BOUNCE_FAIL(expr)                          \
+    do {                                           \
+        if ((rc = (expr)) != WH_ERROR_OK) {        \
+            goto cleanup;                          \
+        }                                          \
+    } while (0)
+#define BOUNCE_ASSERT(cond)                        \
+    do {                                           \
+        if (!(cond)) {                             \
+            WH_ERROR_PRINT("bounce assert failed: %s (line %d)\n", #cond, \
+                           __LINE__);              \
+            rc = WH_ERROR_ABORTED;                 \
+            goto cleanup;                          \
+        }                                          \
+    } while (0)
+
+static int _testClientDmaBounce(whServerContext* server, whClientContext* client)
+{
+    int     rc        = WH_ERROR_OK;
+    int32_t server_rc = 0;
+
+    /* key material to cache + export back */
+    uint8_t  keyIn[32];
+    uint8_t  keyOut[32];
+    uint8_t  labelIn[WH_NVM_LABEL_LEN];
+    uint8_t  labelOut[WH_NVM_LABEL_LEN];
+    uint16_t keyIdIn  = (uint16_t)WH_KEYID_ERASED;
+    uint16_t keyIdOut = 0;
+    uint16_t keyOutSz = sizeof(keyOut);
+
+    /* NVM object to add (server reads) and read back (server writes) */
+    whNvmMetadata meta    = {0};
+    const char*   dataIn  = "bounce-pool-payload";
+    whNvmSize     dataLen = (whNvmSize)strlen(dataIn);
+    uint8_t       dataOut[64];
+
+    WH_TEST_PRINT(
+        "Testing client *Dma APIs through a translating DMA callback...\n");
+
+    whTestDma_BounceReset();
+    memset(keyIn, 0x5A, sizeof(keyIn));
+    memset(labelIn, 0, sizeof(labelIn));
+    (void)snprintf((char*)labelIn, sizeof(labelIn), "bounce-key");
+
+    meta.id     = BOUNCE_TEST_NVM_ID;
+    meta.access = WH_NVM_ACCESS_ANY;
+    meta.flags  = WH_NVM_FLAGS_NONE;
+    meta.len    = dataLen;
+    (void)snprintf((char*)meta.label, sizeof(meta.label), "bounce-obj");
+
+    /* From here on the server can only touch the bounce pool: the server
+     * callback rejects any address the client failed to translate. */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, whTestDma_BounceClientCb));
+    BOUNCE_FAIL(wh_Server_DmaRegisterCb(server, whTestDma_BounceServerCb));
+
+    /* --- NvmAddObjectDma: server READS metadata + data --- */
+    BOUNCE_FAIL(wh_Client_NvmAddObjectDmaRequest(client, &meta, dataLen,
+                                                 (const uint8_t*)dataIn));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmAddObjectDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- NvmReadDma: server WRITES the object's data back --- */
+    memset(dataOut, 0, sizeof(dataOut));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                            dataOut));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    BOUNCE_ASSERT(0 == memcmp(dataIn, dataOut, dataLen));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- NvmAddObjectDma metadata-only (data == NULL): the data slot is not
+     * populated here. Poison it first; a Request that fails to clear it makes
+     * the Response POST a stale mapping (a stray POST), caught below. --- */
+    client->dma.asyncCtx.nvmAdd.data.xformedAddr = (uintptr_t)0xBADC0DE;
+    client->dma.asyncCtx.nvmAdd.data.clientAddr  = (uintptr_t)0xBADC0DE;
+    client->dma.asyncCtx.nvmAdd.data.sz          = 1; /* would trigger a POST */
+    client->dma.asyncCtx.nvmAdd.data.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    {
+        whNvmMetadata metaOnly = {0};
+        whNvmId       moId     = (whNvmId)(BOUNCE_TEST_NVM_ID + 1);
+
+        metaOnly.id     = moId;
+        metaOnly.access = WH_NVM_ACCESS_ANY;
+        metaOnly.flags  = WH_NVM_FLAGS_NONE;
+        metaOnly.len    = 0;
+        (void)snprintf((char*)metaOnly.label, sizeof(metaOnly.label),
+                       "bounce-meta");
+
+        BOUNCE_FAIL(
+            wh_Client_NvmAddObjectDmaRequest(client, &metaOnly, 0, NULL));
+        BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+        BOUNCE_FAIL(wh_Client_NvmAddObjectDmaResponse(client, &server_rc));
+        BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+        BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+        BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+        BOUNCE_FAIL(wh_Client_NvmDestroyObjectsRequest(client, 1, &moId));
+        BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+        BOUNCE_FAIL(wh_Client_NvmDestroyObjectsResponse(client, &server_rc));
+        BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    }
+
+    /* --- Leak recovery on a PRE failure: a Request that fails after acquiring
+     * a mapping must release it. Inject an alloc failure and assert nothing is
+     * left outstanding. No request is sent (PRE fails first), so the comm stays
+     * idle for the cases below. --- */
+    whTestDma_BounceSetAllocBudget(0); /* first PRE fails: nothing acquired */
+    BOUNCE_ASSERT(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                              dataOut) != WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    whTestDma_BounceSetAllocBudget(1); /* meta PRE ok, data PRE fails */
+    BOUNCE_ASSERT(wh_Client_NvmAddObjectDmaRequest(
+                      client, &meta, dataLen, (const uint8_t*)dataIn) !=
+                  WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0); /* meta slot released */
+    whTestDma_BounceSetAllocBudget(-1);                /* restore unlimited */
+    BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+    /* --- KeyCacheDma (server READS) then KeyExportDma (server WRITES): the
+     * use-after-free guard. If KeyCacheDma POSTs inside the Request (the old
+     * bug), the key slot is poisoned before the server reads it, the server
+     * caches poison, and the exported key mismatches keyIn below. --- */
+    BOUNCE_FAIL(wh_Client_KeyCacheDmaRequest(client, 0, labelIn, sizeof(labelIn),
+                                             keyIn, sizeof(keyIn), keyIdIn));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_KeyCacheDmaResponse(client, &keyIdOut));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    memset(keyOut, 0, sizeof(keyOut));
+    memset(labelOut, 0, sizeof(labelOut));
+    BOUNCE_FAIL(wh_Client_KeyExportDmaRequest(client, keyIdOut, keyOut,
+                                              sizeof(keyOut)));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_KeyExportDmaResponse(client, labelOut,
+                                               sizeof(labelOut), &keyOutSz));
+    BOUNCE_ASSERT(keyOutSz == sizeof(keyIn));
+    BOUNCE_ASSERT(0 == memcmp(keyIn, keyOut, sizeof(keyIn)));
+    BOUNCE_ASSERT(0 == memcmp(labelIn, labelOut, sizeof(labelIn)));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- Teeth check: with the client callback removed, the raw client pointer
+     * is outside the pool, so the server callback must reject it. --- */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, NULL));
+    memset(dataOut, 0, sizeof(dataOut));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                            dataOut));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc != WH_ERROR_OK);
+
+    /* Remove the test object (normal-path teardown). */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, whTestDma_BounceClientCb));
+    BOUNCE_FAIL(wh_Client_NvmDestroyObjectsRequest(client, 1, &meta.id));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmDestroyObjectsResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+
+    /* No mapping was leaked and no stray/double POST occurred across the run. */
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+    BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+    WH_TEST_PRINT("Client *Dma translating-callback tests PASSED\n");
+
+cleanup:
+    /* Always unregister both callbacks so a failure cannot leak the pool-only
+     * enforcement into later tests (both accept NULL). The test object is
+     * removed on the normal path only; on failure the suite aborts and the next
+     * harness instance uses fresh NVM, so we avoid more transport traffic over
+     * a possibly half-processed request. */
+    (void)wh_Client_DmaRegisterCb(client, NULL);
+    (void)wh_Server_DmaRegisterCb(server, NULL);
+    return rc;
+}
+
+#undef BOUNCE_FAIL
+#undef BOUNCE_ASSERT
 #endif /* WOLFHSM_CFG_DMA && WOLFHSM_CFG_ENABLE_CLIENT && \
           WOLFHSM_CFG_ENABLE_SERVER */
 
@@ -1199,6 +1395,10 @@ int whTest_ClientServerSequential(whTestNvmBackendType nvmType)
 #ifdef WOLFHSM_CFG_DMA
     /* Test DMA callbacks and address allowlisting */
     WH_TEST_RETURN_ON_FAIL(_testDma(server, client));
+
+    /* Drive the client *Dma APIs through a translating callback so a missing
+     * translation is caught on POSIX, not just on cross-domain hardware. */
+    WH_TEST_RETURN_ON_FAIL(_testClientDmaBounce(server, client));
 #endif /* WOLFHSM_CFG_DMA */
 
     /* Check that we are still connected */
diff --git a/test/wh_test_crypto.c b/test/wh_test_crypto.c
index da9434743..bdede4dd3 100644
--- a/test/wh_test_crypto.c
+++ b/test/wh_test_crypto.c
@@ -63,6 +63,7 @@
 #include "wolfhsm/wh_crypto.h"
 
 #include "wh_test_common.h"
+#include "wh_test_dma.h"
 
 #if defined(WOLFHSM_CFG_TEST_POSIX)
 #include <unistd.h> /* For sleep */
@@ -14438,7 +14439,13 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     }};
 
 #ifdef WOLFHSM_CFG_DMA
-    whClientDmaConfig clientDmaConfig = {0};
+    /* Run every crypto/cert *Dma op through the bounce-pool callback so a
+     * missing translation is rejected (see test/wh_test_dma.c). Catches missing
+     * translation; the use-after-free class is covered by the single-thread
+     * harness. */
+    whClientDmaConfig clientDmaConfig = {
+        .cb = whTestDma_BounceClientCb,
+    };
 #endif
     whClientConfig c_conf[1] = {{
         .comm = cc_conf,
@@ -14479,16 +14486,30 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     /* Crypto context */
     whServerCryptoContext crypto[1] = {0};
 
+#ifdef WOLFHSM_CFG_DMA
+    /* Server may only touch the bounce pool; this callback rejects an
+     * untranslated client pointer. */
+    whServerDmaConfig serverDmaConfig = {
+        .cb = whTestDma_BounceServerCb,
+    };
+#endif
 
     whServerConfig s_conf[1] = {{
         .comm_config = cs_conf,
         .nvm         = nvm,
         .crypto      = crypto,
         .devId       = INVALID_DEVID,
+#ifdef WOLFHSM_CFG_DMA
+        .dmaConfig   = &serverDmaConfig,
+#endif
     }};
 
     WH_TEST_RETURN_ON_FAIL(wh_Nvm_Init(nvm, n_conf));
 
+#ifdef WOLFHSM_CFG_DMA
+    whTestDma_BounceReset();
+#endif
+
     ret = wolfCrypt_Init();
     if (ret == 0) {
         ret = wc_InitRng_ex(crypto->rng, NULL, INVALID_DEVID);
@@ -14497,6 +14518,22 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
         }
         else {
             _whClientServerThreadTest(c_conf, s_conf);
+#ifdef WOLFHSM_CFG_DMA
+            /* After the client thread joins, no mapping may be outstanding and
+             * no POST may have hit a stale/unknown slot. */
+            if (whTestDma_BounceOutstanding() != 0) {
+                WH_ERROR_PRINT("wh_test bounce: %d DMA mapping(s) leaked "
+                               "across the crypto suite\n",
+                               whTestDma_BounceOutstanding());
+                ret = WH_ERROR_ABORTED;
+            }
+            if (whTestDma_BounceStrayPosts() != 0) {
+                WH_ERROR_PRINT("wh_test bounce: %d stray/double DMA POST(s) "
+                               "across the crypto suite\n",
+                               whTestDma_BounceStrayPosts());
+                ret = WH_ERROR_ABORTED;
+            }
+#endif
         }
     }
     else {
@@ -14507,7 +14544,9 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     wc_FreeRng(crypto->rng);
     wolfCrypt_Cleanup();
 
-    return WH_ERROR_OK;
+    /* Propagate ret (was hard-coded WH_ERROR_OK): surfaces an init failure and
+     * the DMA bounce leak check instead of silently passing. */
+    return ret;
 }
 #endif /* WOLFHSM_CFG_TEST_POSIX */
 
diff --git a/test/wh_test_dma.c b/test/wh_test_dma.c
index d57e0bccd..88bd3db46 100644
--- a/test/wh_test_dma.c
+++ b/test/wh_test_dma.c
@@ -37,6 +37,230 @@
 #include "wh_test_common.h"
 #include "wh_test_dma.h"
 
+/*
+ * Shared "bounce-pool" translating DMA callback harness.
+ *
+ * Models a split-address-space port: the server can only reach a dedicated pool,
+ * not arbitrary client RAM. The client callback bounces each buffer through a
+ * pool slot and hands the server the pool address; the server callback rejects
+ * (WH_ERROR_ACCESS) any address outside the pool, so a *Dma API that forgot to
+ * translate is caught. Freed slots are poisoned, so a premature POST
+ * (use-after-free) corrupts the data, and a POST matching no live slot is
+ * counted as a stray/double POST.
+ *
+ * Missing translation is caught in both harnesses; the use-after-free class is
+ * deterministic only in the single-thread pump harness, where the client POST
+ * is ordered before the server read.
+ *
+ * Single-client only: the allocator is mutated only by the (serialized) client
+ * side; the server just reads/writes pool bytes, with happens-before provided
+ * by the request/response round-trip through the transport.
+ */
+struct whClientContext_t; /* opaque: callbacks only use the pointer */
+struct whServerContext_t; /* opaque: callbacks only use the pointer */
+
+/* Generous headroom for the few buffers one op maps concurrently (the largest
+ * being an ML-DSA-sized key); the pool recycles between ops. */
+#define BOUNCE_POOL_SIZE \
+    ((64 * 1024) + (8 * WOLFHSM_CFG_SERVER_KEYCACHE_BIG_BUFSIZE))
+#define BOUNCE_POOL_SLOTS 64
+#define BOUNCE_POISON_BYTE ((uint8_t)0xEF)
+
+typedef struct {
+    int       inUse;
+    uintptr_t base; /* address within g_bouncePool */
+    size_t    len;
+} bounceSlot;
+
+static uint8_t    g_bouncePool[BOUNCE_POOL_SIZE];
+static bounceSlot g_bounceSlots[BOUNCE_POOL_SLOTS];
+static size_t     g_bounceUsed;        /* bump offset into the pool */
+static int        g_bounceOutstanding; /* slots currently allocated */
+static int        g_bounceStrayPost;   /* len>0 POSTs with no matching slot */
+static int        g_bounceAllocBudget; /* allocs still allowed; <0 = unlimited */
+
+void whTestDma_BounceReset(void)
+{
+    memset(g_bouncePool, BOUNCE_POISON_BYTE, sizeof(g_bouncePool));
+    memset(g_bounceSlots, 0, sizeof(g_bounceSlots));
+    g_bounceUsed        = 0;
+    g_bounceOutstanding = 0;
+    g_bounceStrayPost   = 0;
+    g_bounceAllocBudget = -1;
+}
+
+int whTestDma_BounceOutstanding(void)
+{
+    return g_bounceOutstanding;
+}
+
+int whTestDma_BounceStrayPosts(void)
+{
+    return g_bounceStrayPost;
+}
+
+void whTestDma_BounceSetAllocBudget(int allocs)
+{
+    g_bounceAllocBudget = allocs;
+}
+
+static bounceSlot* _bounceAlloc(size_t len)
+{
+    int    i;
+    size_t aligned = (len + 7u) & ~(size_t)7u; /* 8-byte align slices */
+
+    /* Injected failure for exercising leak-recovery paths; no diagnostic. */
+    if (g_bounceAllocBudget == 0) {
+        return NULL;
+    }
+
+    if (g_bounceUsed + aligned > sizeof(g_bouncePool)) {
+        /* With recycle-on-empty this usually means a leaked mapping (PRE
+         * without POST) rather than a too-small pool. */
+        WH_ERROR_PRINT("wh_test bounce: pool exhausted (used %u + %u > %u, "
+                       "%d outstanding); likely a leaked DMA mapping\n",
+                       (unsigned)g_bounceUsed, (unsigned)aligned,
+                       (unsigned)sizeof(g_bouncePool), g_bounceOutstanding);
+        return NULL;
+    }
+    for (i = 0; i < BOUNCE_POOL_SLOTS; i++) {
+        if (!g_bounceSlots[i].inUse) {
+            g_bounceSlots[i].inUse = 1;
+            g_bounceSlots[i].base  = (uintptr_t)&g_bouncePool[g_bounceUsed];
+            g_bounceSlots[i].len   = len;
+            g_bounceUsed += aligned;
+            g_bounceOutstanding++;
+            if (g_bounceAllocBudget > 0) {
+                g_bounceAllocBudget--;
+            }
+            return &g_bounceSlots[i];
+        }
+    }
+    WH_ERROR_PRINT("wh_test bounce: out of slots (%d); raise BOUNCE_POOL_SLOTS "
+                   "or check for a leaked mapping\n",
+                   BOUNCE_POOL_SLOTS);
+    return NULL;
+}
+
+static bounceSlot* _bounceFind(uintptr_t base)
+{
+    int i;
+    for (i = 0; i < BOUNCE_POOL_SLOTS; i++) {
+        if (g_bounceSlots[i].inUse && g_bounceSlots[i].base == base) {
+            return &g_bounceSlots[i];
+        }
+    }
+    return NULL;
+}
+
+static void _bounceFree(bounceSlot* s)
+{
+    /* Poison on free so any read of a stale (post-POST) slot is detectable. */
+    memset((void*)s->base, BOUNCE_POISON_BYTE, s->len);
+    s->inUse = 0;
+    s->base  = 0;
+    s->len   = 0;
+    g_bounceOutstanding--;
+    /* Recycle the whole pool once every slot has been released, so a long run
+     * of operations cannot exhaust the bump offset. */
+    if (g_bounceOutstanding == 0) {
+        g_bounceUsed = 0;
+    }
+}
+
+int whTestDma_BounceClientCb(struct whClientContext_t* client,
+                             uintptr_t clientAddr, void** xformedAddr,
+                             size_t len, whDmaOper oper, whDmaFlags flags)
+{
+    bounceSlot* s;
+    (void)client;
+    (void)flags;
+
+    /* Zero-length operations carry no data and are never dereferenced by the
+     * server; pass the address through untouched (no slot needed). */
+    if (len == 0) {
+        *xformedAddr = (void*)clientAddr;
+        return WH_ERROR_OK;
+    }
+
+    switch (oper) {
+        case WH_DMA_OPER_CLIENT_READ_PRE:
+            /* Server is about to read client memory: copy it into a pool slot
+             * and hand the server the pool address. */
+            s = _bounceAlloc(len);
+            if (s == NULL) {
+                return WH_ERROR_ABORTED;
+            }
+            memcpy((void*)s->base, (void*)clientAddr, len);
+            *xformedAddr = (void*)s->base;
+            break;
+
+        case WH_DMA_OPER_CLIENT_WRITE_PRE:
+            /* Server is about to write client memory: give it a pool slot to
+             * write into. */
+            s = _bounceAlloc(len);
+            if (s == NULL) {
+                return WH_ERROR_ABORTED;
+            }
+            *xformedAddr = (void*)s->base;
+            break;
+
+        case WH_DMA_OPER_CLIENT_READ_POST:
+            /* Release (and poison) the slot. A len>0 POST matching no live slot
+             * is a stray/double POST (a real port would free a bogus pointer);
+             * record it. */
+            s = _bounceFind((uintptr_t)*xformedAddr);
+            if (s != NULL) {
+                _bounceFree(s);
+            }
+            else {
+                g_bounceStrayPost++;
+            }
+            break;
+
+        case WH_DMA_OPER_CLIENT_WRITE_POST:
+            /* Server done writing: copy the result back to the client buffer,
+             * then release (and poison) the slot. See READ_POST on stray. */
+            s = _bounceFind((uintptr_t)*xformedAddr);
+            if (s != NULL) {
+                memcpy((void*)clientAddr, (void*)s->base, len);
+                _bounceFree(s);
+            }
+            else {
+                g_bounceStrayPost++;
+            }
+            break;
+    }
+    return WH_ERROR_OK;
+}
+
+int whTestDma_BounceServerCb(struct whServerContext_t* server,
+                             uintptr_t clientAddr, void** serverPtr, size_t len,
+                             whDmaOper oper, whDmaFlags flags)
+{
+    uintptr_t base = (uintptr_t)g_bouncePool;
+    (void)server;
+    (void)oper;
+    (void)flags;
+
+    /* An address outside the pool means a *Dma path skipped translation and
+     * sent a raw client pointer; reject it. Overflow-safe: clientAddr - base is
+     * only formed once clientAddr >= base. */
+    if (len > 0) {
+        if (clientAddr < base || clientAddr - base > sizeof(g_bouncePool) ||
+            len > sizeof(g_bouncePool) - (clientAddr - base)) {
+            WH_ERROR_PRINT("wh_test bounce: server got untranslated address %p "
+                           "(len %u) outside the DMA pool\n",
+                           (void*)clientAddr, (unsigned)len);
+            return WH_ERROR_ACCESS;
+        }
+    }
+
+    /* Pool address is directly usable by the server in this same process. */
+    *serverPtr = (void*)clientAddr;
+    return WH_ERROR_OK;
+}
+
 static int whTest_DmaAllowListBasic(void)
 {
     int                rc;
diff --git a/test/wh_test_dma.h b/test/wh_test_dma.h
index c411dfe21..3ec0854d1 100644
--- a/test/wh_test_dma.h
+++ b/test/wh_test_dma.h
@@ -24,6 +24,48 @@
 #ifndef TEST_WH_TEST_DMA_H_
 #define TEST_WH_TEST_DMA_H_
 
+#include "wolfhsm/wh_settings.h"
+
 int whTest_Dma(void);
 
+#ifdef WOLFHSM_CFG_DMA
+#include <stdint.h>
+#include <stddef.h>
+#include "wolfhsm/wh_dma.h"
+
+struct whClientContext_t;
+struct whServerContext_t;
+
+/* Shared "bounce-pool" translating DMA callback harness (see wh_test_dma.c).
+ * Register whTestDma_BounceClientCb / whTestDma_BounceServerCb as the client /
+ * server DMA callbacks; the server callback rejects any address a *Dma path
+ * failed to translate. Single-client only (see wh_test_dma.c). */
+
+/* Reset the pool between independent test sequences. */
+void whTestDma_BounceReset(void);
+
+/* Translating client DMA callback (matches whClientDmaClientMemCb). */
+int whTestDma_BounceClientCb(struct whClientContext_t* client,
+                             uintptr_t clientAddr, void** xformedAddr,
+                             size_t len, whDmaOper oper, whDmaFlags flags);
+
+/* Validating server DMA callback (matches whServerDmaClientMemCb): identity
+ * maps in-pool addresses, rejects out-of-pool ones with WH_ERROR_ACCESS. */
+int whTestDma_BounceServerCb(struct whServerContext_t* server,
+                             uintptr_t clientAddr, void** serverPtr, size_t len,
+                             whDmaOper oper, whDmaFlags flags);
+
+/* Slots currently allocated (0 between operations); for leak assertions. */
+int whTestDma_BounceOutstanding(void);
+
+/* Count of len>0 POSTs that found no matching live slot (a stray/double POST).
+ * Should stay 0. */
+int whTestDma_BounceStrayPosts(void);
+
+/* Fault injection: allow this many further slot allocations, then fail (the
+ * client callback returns an error). Negative = unlimited (the default). Used
+ * to drive the *Dma Request leak-recovery paths. */
+void whTestDma_BounceSetAllocBudget(int allocs);
+#endif /* WOLFHSM_CFG_DMA */
+
 #endif /* TEST_WH_TEST_DMA_H_ */
diff --git a/wolfhsm/wh_client.h b/wolfhsm/wh_client.h
index 6be78cc9f..77e541504 100644
--- a/wolfhsm/wh_client.h
+++ b/wolfhsm/wh_client.h
@@ -147,14 +147,35 @@ typedef struct {
     uint64_t  inSz;
 } whClientDmaAsyncCmac;
 
-/* Async DMA context union. Only one DMA request can be in flight at a time
- * per client context, so a single union suffices. Each Response function
- * knows which member to access based on its own operation type. */
+/* One client buffer mapped across a DMA Request/Response boundary. The Request
+ * stashes the translated address, original client address, length, and POST
+ * direction; the Response runs wh_Client_DmaAsyncPost(). sz == 0 means nothing
+ * to clean up. postOper keeps the POST direction-correct when this shared union
+ * member is used by different ops; it does not make a mispaired Request/Response
+ * safe (the one-in-flight, self-paired invariant still applies). */
+typedef struct {
+    uintptr_t xformedAddr;
+    uintptr_t clientAddr;
+    uint64_t  sz;
+    whDmaOper postOper;
+} whClientDmaAsyncBuf;
+
+/* Two buffers mapped together for NvmAddObjectDma (metadata + optional data). */
+typedef struct {
+    whClientDmaAsyncBuf meta;
+    whClientDmaAsyncBuf data;
+} whClientDmaAsyncNvmAdd;
+
+/* Async DMA context union; only one DMA request is in flight at a time. The
+ * crypto members (sha/rng/cmac/aes) are bespoke and predate the generic holder.
+ * Key/NVM ops use `buf` (single-buffer) and `nvmAdd` (two-buffer). */
 typedef union {
-    whClientDmaAsyncSha  sha;
-    whClientDmaAsyncRng  rng;
-    whClientDmaAsyncAes  aes;
-    whClientDmaAsyncCmac cmac;
+    whClientDmaAsyncSha    sha;
+    whClientDmaAsyncRng    rng;
+    whClientDmaAsyncAes    aes;
+    whClientDmaAsyncCmac   cmac;
+    whClientDmaAsyncBuf    buf;
+    whClientDmaAsyncNvmAdd nvmAdd;
 } whClientDmaAsyncCtx;
 
 typedef struct {
@@ -3362,6 +3383,24 @@ int wh_Client_DmaProcessClientAddress(struct whClientContext_t* client,
                                       whDmaFlags flags);
 
 
+/**
+ * @brief Runs the POST half of a stashed DMA buffer mapping (INTERNAL).
+ *
+ * Shared between the client *Dma source files; not port-facing. Releases (and,
+ * for a server-write buffer, copies back) a mapping stashed by the matching
+ * Request, using buf->postOper for the direction. No-op when buf is NULL or
+ * buf->sz is 0; clears buf->sz (even on failure) so a later Response cannot
+ * re-run it.
+ *
+ * @param[in] client Pointer to the client context.
+ * @param[in,out] buf The stashed single-buffer mapping to clean up.
+ * @return WH_ERROR_OK, or the port POST callback's error (e.g. a failed unmap
+ *         or copy-back); WH_ERROR_OK when there is nothing to clean up.
+ */
+int wh_Client_DmaAsyncPost(struct whClientContext_t* client,
+                           whClientDmaAsyncBuf*       buf);
+
+
 /**
  * @brief Sends a DMA request and receives a response to verify an attribute
  * certificate.