From 2770833767442074524665c41d132d0fef4951d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Frauenschl=C3=A4ger?= <tobias@wolfssl.com>
Date: Fri, 5 Jun 2026 13:47:51 +0200
Subject: [PATCH 1/2] client DMA: fix KeyCacheDma use-after-free and translate
 key/NVM *Dma buffers

KeyExportDma, KeyExportPublicDma, NvmAddObjectDma and NvmReadDma put the raw
client pointer in the wire message instead of running the DMA translation
callback -- invisible on flat-memory ports (POSIX SHM) but rejected by a
split-address-space server. Each now runs the PRE translation before sending
and the matching POST on the Response.

KeyCacheDmaRequest also ran its POST inside the Request, before the server read
the buffer (a use-after-free on ports whose callback allocates); it now defers
the POST to KeyCacheDmaResponse.

The per-call-site async structs are consolidated into whClientDmaAsyncBuf with a
shared wh_Client_DmaAsyncPost() helper that returns the POST status (so a failed
unmap/copy-back is surfaced). Each Request clears its slot(s) before the PRE, so
an unpopulated slot (e.g. metadata-only NvmAddObjectDma) cannot POST stale union
memory, and forwards data_hostaddr = 0 when there is no data buffer. Requests
fail fast with WH_ERROR_REQUEST_PENDING on a busy transport, and
wh_{Client,Server}_DmaRegisterAllowList() now accept NULL to unregister.
---
 src/wh_client.c     | 164 +++++++++++++++++++++++++++++++++++++-------
 src/wh_client_dma.c |  23 ++++++-
 src/wh_client_nvm.c | 131 +++++++++++++++++++++++++++++++----
 src/wh_server_dma.c |   4 +-
 wolfhsm/wh_client.h |  53 ++++++++++++--
 5 files changed, 331 insertions(+), 44 deletions(-)

diff --git a/src/wh_client.c b/src/wh_client.c
index 5c22f58ae..f577bb993 100644
--- a/src/wh_client.c
+++ b/src/wh_client.c
@@ -1430,30 +1430,48 @@ int wh_Client_KeyCacheDmaRequest(whClientContext* c, uint32_t flags,
                                  const void* keyAddr, uint16_t keySz,
                                  uint16_t keyId)
 {
-    int                                ret;
-    whMessageKeystore_CacheDmaRequest* req = NULL;
-    uintptr_t                          keyAddrPtr = 0;
-    uint16_t                           capSz      = 0;
+    int                                ret             = WH_ERROR_OK;
+    whMessageKeystore_CacheDmaRequest* req             = NULL;
+    uintptr_t                          keyAddrPtr      = 0;
+    uint16_t                           capSz           = 0;
+    int                                keyAddrAcquired = 0;
 
     if (c == NULL || (labelSz > 0 && label == NULL)) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req = (whMessageKeystore_CacheDmaRequest*)wh_CommClient_GetDataPtr(c->comm);
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
     memset(req, 0, sizeof(*req));
-    req->id      = keyId;
-    req->flags   = flags;
-    req->labelSz = 0;
-
-    /* Set up DMA buffer info */
+    req->id       = keyId;
+    req->flags    = flags;
+    req->labelSz  = 0;
     req->key.sz   = keySz;
-    ret           = wh_Client_DmaProcessClientAddress(
+    req->key.addr = 0;
+
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the input key buffer. POST runs in the Response, not here:
+     * the server reads the buffer between request and response, so an
+     * in-request POST would free the scratch too early (use-after-free). */
+    ret = wh_Client_DmaProcessClientAddress(
         c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
         WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
-    req->key.addr = keyAddrPtr;
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    }
 
     /* Copy label if provided, truncate if necessary */
     if (labelSz > 0 && label != NULL) {
@@ -1467,9 +1485,10 @@ int wh_Client_KeyCacheDmaRequest(whClientContext* c, uint32_t flags,
                                     sizeof(*req), (uint8_t*)req);
     }
 
-    (void)wh_Client_DmaProcessClientAddress(
-        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
-        WH_DMA_OPER_CLIENT_READ_POST, (whDmaFlags){0});
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        /* SendRequest failed: the Response will not run, so POST now. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
     return ret;
 }
 
@@ -1492,6 +1511,9 @@ int wh_Client_KeyCacheDmaResponse(whClientContext* c, uint16_t* keyId)
     }
 
     ret = wh_Client_RecvResponse(c, &group, &action, &size, (uint8_t*)resp);
+    if (ret == WH_ERROR_NOTREADY) {
+        return ret;
+    }
 
     if (ret == 0) {
         /* Validate response */
@@ -1510,6 +1532,15 @@ int wh_Client_KeyCacheDmaResponse(whClientContext* c, uint16_t* keyId)
             }
         }
     }
+
+    /* POST cleanup: release the mapping once the server has read it. Surface a
+     * POST failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (ret == WH_ERROR_OK) {
+            ret = postRc;
+        }
+    }
     return ret;
 }
 
@@ -1531,23 +1562,56 @@ int wh_Client_KeyCacheDma(whClientContext* c, uint32_t flags, uint8_t* label,
 int wh_Client_KeyExportDmaRequest(whClientContext* c, uint16_t keyId,
                                   const void* keyAddr, uint16_t keySz)
 {
-    whMessageKeystore_ExportDmaRequest* req = NULL;
+    whMessageKeystore_ExportDmaRequest* req             = NULL;
+    uintptr_t                           keyAddrPtr      = 0;
+    int                                 ret             = WH_ERROR_OK;
+    int                                 keyAddrAcquired = 0;
 
     if (c == NULL || keyId == WH_KEYID_ERASED) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req =
         (whMessageKeystore_ExportDmaRequest*)wh_CommClient_GetDataPtr(c->comm);
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
+
     req->id       = keyId;
-    req->key.addr = (uint64_t)((uintptr_t)keyAddr);
+    req->key.addr = 0;
     req->key.sz   = keySz;
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY, WH_KEY_EXPORT_DMA,
-                                 sizeof(*req), (uint8_t*)req);
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output key buffer; the server fills it and the
+     * Response POST copies the result back and releases it. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
+        WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+    }
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY, WH_KEY_EXPORT_DMA,
+                                    sizeof(*req), (uint8_t*)req);
+    }
+
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        /* SendRequest failed: the Response will not run, so POST now. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
@@ -1571,6 +1635,9 @@ int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size,
                                 (uint8_t*)resp);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_KEY) ||
@@ -1595,6 +1662,15 @@ int wh_Client_KeyExportDmaResponse(whClientContext* c, uint8_t* label,
             }
         }
     }
+
+    /* POST cleanup: copy results back and release the mapping; surface a POST
+     * failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
@@ -1616,11 +1692,18 @@ int wh_Client_KeyExportPublicDmaRequest(whClientContext* c, whKeyId keyId,
                                         uint16_t algo, void* keyAddr,
                                         uint16_t keySz)
 {
-    whMessageKeystore_ExportPublicDmaRequest* req = NULL;
+    whMessageKeystore_ExportPublicDmaRequest* req             = NULL;
+    uintptr_t                                 keyAddrPtr      = 0;
+    int                                       ret             = WH_ERROR_OK;
+    int                                       keyAddrAcquired = 0;
 
     if (c == NULL || keyId == WH_KEYID_ERASED) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
 
     req =
         (whMessageKeystore_ExportPublicDmaRequest*)wh_CommClient_GetDataPtr(
@@ -1628,14 +1711,38 @@ int wh_Client_KeyExportPublicDmaRequest(whClientContext* c, whKeyId keyId,
     if (req == NULL) {
         return WH_ERROR_BADARGS;
     }
+
     req->id       = keyId;
     req->algo     = algo;
-    req->key.addr = (uint64_t)((uintptr_t)keyAddr);
+    req->key.addr = 0;
     req->key.sz   = keySz;
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY,
-                                 WH_KEY_EXPORT_PUBLIC_DMA, sizeof(*req),
-                                 (uint8_t*)req);
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output public key buffer; see KeyExportDmaRequest. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)keyAddr, (void**)&keyAddrPtr, keySz,
+        WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        keyAddrAcquired                 = 1;
+        req->key.addr                   = (uint64_t)keyAddrPtr;
+        c->dma.asyncCtx.buf.xformedAddr = keyAddrPtr;
+        c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)keyAddr;
+        c->dma.asyncCtx.buf.sz          = keySz;
+        c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+    }
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_KEY,
+                                    WH_KEY_EXPORT_PUBLIC_DMA, sizeof(*req),
+                                    (uint8_t*)req);
+    }
+
+    if (ret != WH_ERROR_OK && keyAddrAcquired) {
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
@@ -1660,6 +1767,9 @@ int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size,
                                 (uint8_t*)resp);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         if (resp_size != sizeof(*resp)) {
             rc = WH_ERROR_ABORTED;
@@ -1679,6 +1789,14 @@ int wh_Client_KeyExportPublicDmaResponse(whClientContext* c, uint8_t* label,
             }
         }
     }
+
+    /* POST cleanup; see KeyExportDmaResponse. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
diff --git a/src/wh_client_dma.c b/src/wh_client_dma.c
index 7dee4bef6..ecaa35331 100644
--- a/src/wh_client_dma.c
+++ b/src/wh_client_dma.c
@@ -36,10 +36,12 @@
 int wh_Client_DmaRegisterAllowList(whClientContext*          client,
                                    const whDmaAddrAllowList* allowlist)
 {
-    if (NULL == client || NULL == allowlist) {
+    if (NULL == client) {
         return WH_ERROR_BADARGS;
     }
 
+    /* A NULL allowlist clears any previously registered list (no enforcement),
+     * symmetric with wh_Client_DmaRegisterCb(NULL). */
     client->dma.dmaAddrAllowList = allowlist;
 
     return WH_ERROR_OK;
@@ -94,4 +96,23 @@ int wh_Client_DmaProcessClientAddress(whClientContext* client,
     }
     return rc;
 }
+
+int wh_Client_DmaAsyncPost(whClientContext* client, whClientDmaAsyncBuf* buf)
+{
+    int       rc;
+    uintptr_t addr;
+
+    if (client == NULL || buf == NULL || buf->sz == 0) {
+        return WH_ERROR_OK;
+    }
+
+    addr = buf->xformedAddr;
+    rc   = wh_Client_DmaProcessClientAddress(client, buf->clientAddr,
+                                             (void**)&addr, (size_t)buf->sz,
+                                             buf->postOper, (whDmaFlags){0});
+    /* Clear the slot even on failure so a later Response cannot re-run the
+     * POST; the failure is returned to the caller. */
+    buf->sz = 0;
+    return rc;
+}
 #endif /* WOLFHSM_CFG_DMA */
diff --git a/src/wh_client_nvm.c b/src/wh_client_nvm.c
index 5742b97c7..1327275c5 100644
--- a/src/wh_client_nvm.c
+++ b/src/wh_client_nvm.c
@@ -695,19 +695,67 @@ int wh_Client_NvmAddObjectDmaRequest(whClientContext* c,
                                      whNvmMetadata*   metadata,
                                      whNvmSize data_len, const uint8_t* data)
 {
-    whMessageNvm_AddObjectDmaRequest msg = {0};
+    whMessageNvm_AddObjectDmaRequest msg         = {0};
+    uintptr_t                        metaAddrPtr = 0;
+    uintptr_t                        dataAddrPtr = 0;
+    int                              ret         = WH_ERROR_OK;
 
     if (c == NULL) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
+
+    /* Clear both slots up front: a metadata-only object leaves the data slot
+     * unset, and the Response must not POST a stale (shared-union) size. */
+    c->dma.asyncCtx.nvmAdd.meta.sz = 0;
+    c->dma.asyncCtx.nvmAdd.data.sz = 0;
+
+    /* PRE-translate the metadata struct (fixed size) and the optional data
+     * buffer; the matching Response POST releases them. */
+    ret = wh_Client_DmaProcessClientAddress(
+        c, (uintptr_t)metadata, (void**)&metaAddrPtr, sizeof(whNvmMetadata),
+        WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
+    if (ret == WH_ERROR_OK) {
+        c->dma.asyncCtx.nvmAdd.meta.xformedAddr = metaAddrPtr;
+        c->dma.asyncCtx.nvmAdd.meta.clientAddr  = (uintptr_t)metadata;
+        c->dma.asyncCtx.nvmAdd.meta.sz          = sizeof(whNvmMetadata);
+        c->dma.asyncCtx.nvmAdd.meta.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    }
+
+    if (ret == WH_ERROR_OK && data != NULL && data_len > 0) {
+        ret = wh_Client_DmaProcessClientAddress(
+            c, (uintptr_t)data, (void**)&dataAddrPtr, data_len,
+            WH_DMA_OPER_CLIENT_READ_PRE, (whDmaFlags){0});
+        if (ret == WH_ERROR_OK) {
+            c->dma.asyncCtx.nvmAdd.data.xformedAddr = dataAddrPtr;
+            c->dma.asyncCtx.nvmAdd.data.clientAddr  = (uintptr_t)data;
+            c->dma.asyncCtx.nvmAdd.data.sz          = data_len;
+            c->dma.asyncCtx.nvmAdd.data.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+        }
+    }
 
-    msg.metadata_hostaddr = (uint64_t)(uintptr_t)metadata;
-    msg.data_hostaddr     = (uint64_t)(uintptr_t)data;
-    msg.data_len          = data_len;
+    msg.metadata_hostaddr = (uint64_t)metaAddrPtr;
+    /* 0 when there is no data buffer to DMA (dataAddrPtr is set only by the
+     * data PRE); never forward a raw, untranslated client pointer. */
+    msg.data_hostaddr = (uint64_t)dataAddrPtr;
+    msg.data_len      = data_len;
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
+                                    WH_MESSAGE_NVM_ACTION_ADDOBJECTDMA,
+                                    sizeof(msg), &msg);
+    }
 
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
-                                 WH_MESSAGE_NVM_ACTION_ADDOBJECTDMA,
-                                 sizeof(msg), &msg);
+    if (ret != WH_ERROR_OK) {
+        /* Send/PRE failed: release whatever was acquired (helper no-ops on the
+         * unset slot), in reverse order. */
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.data);
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.meta);
+    }
+    return ret;
 }
 
 int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
@@ -723,6 +771,9 @@ int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
     }
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size, &msg);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_NVM) ||
@@ -738,6 +789,16 @@ int wh_Client_NvmAddObjectDmaResponse(whClientContext* c, int32_t* out_rc)
             }
         }
     }
+
+    /* POST cleanup for both slots, reverse acquisition order; surface a POST
+     * failure if the operation otherwise succeeded. */
+    {
+        int postData = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.data);
+        int postMeta = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.nvmAdd.meta);
+        if (rc == WH_ERROR_OK) {
+            rc = (postData != WH_ERROR_OK) ? postData : postMeta;
+        }
+    }
     return rc;
 }
 
@@ -766,19 +827,53 @@ int wh_Client_NvmReadDmaRequest(whClientContext* c, whNvmId id,
                                 whNvmSize offset, whNvmSize data_len,
                                 uint8_t* data)
 {
-    whMessageNvm_ReadDmaRequest msg = {0};
+    whMessageNvm_ReadDmaRequest msg              = {0};
+    uintptr_t                   dataAddrPtr      = 0;
+    int                         ret              = WH_ERROR_OK;
+    int                         dataAddrAcquired = 0;
 
     if (c == NULL) {
         return WH_ERROR_BADARGS;
     }
+    /* Fail fast if busy: don't acquire a mapping a rejected send would leak. */
+    if (wh_CommClient_IsRequestPending(c->comm) == 1) {
+        return WH_ERROR_REQUEST_PENDING;
+    }
+
+    /* Clear the slot up front so a skipped PRE leaves nothing for POST. */
+    c->dma.asyncCtx.buf.sz = 0;
+
+    /* PRE-translate the output data buffer (only when there is one); the server
+     * writes the NVM contents and the Response POST copies them back. Skipping
+     * the empty case keeps a raw, untranslated pointer out of the message. */
+    if (data != NULL && data_len > 0) {
+        ret = wh_Client_DmaProcessClientAddress(
+            c, (uintptr_t)data, (void**)&dataAddrPtr, data_len,
+            WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0});
+        if (ret == WH_ERROR_OK) {
+            dataAddrAcquired                = 1;
+            c->dma.asyncCtx.buf.xformedAddr = dataAddrPtr;
+            c->dma.asyncCtx.buf.clientAddr  = (uintptr_t)data;
+            c->dma.asyncCtx.buf.sz          = data_len;
+            c->dma.asyncCtx.buf.postOper    = WH_DMA_OPER_CLIENT_WRITE_POST;
+        }
+    }
 
     msg.id            = id;
     msg.offset        = offset;
     msg.data_len      = data_len;
-    msg.data_hostaddr = (uint64_t)(uintptr_t)data;
-    return wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
-                                 WH_MESSAGE_NVM_ACTION_READDMA, sizeof(msg),
-                                 &msg);
+    msg.data_hostaddr = (uint64_t)dataAddrPtr;
+
+    if (ret == WH_ERROR_OK) {
+        ret = wh_Client_SendRequest(c, WH_MESSAGE_GROUP_NVM,
+                                    WH_MESSAGE_NVM_ACTION_READDMA, sizeof(msg),
+                                    &msg);
+    }
+
+    if (ret != WH_ERROR_OK && dataAddrAcquired) {
+        (void)wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+    }
+    return ret;
 }
 
 int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
@@ -794,6 +889,9 @@ int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
     }
 
     rc = wh_Client_RecvResponse(c, &resp_group, &resp_action, &resp_size, &msg);
+    if (rc == WH_ERROR_NOTREADY) {
+        return rc;
+    }
     if (rc == 0) {
         /* Validate response */
         if ((resp_group != WH_MESSAGE_GROUP_NVM) ||
@@ -809,6 +907,15 @@ int wh_Client_NvmReadDmaResponse(whClientContext* c, int32_t* out_rc)
             }
         }
     }
+
+    /* POST cleanup: copy the server's writes back and release the mapping;
+     * surface a POST failure if the operation otherwise succeeded. */
+    {
+        int postRc = wh_Client_DmaAsyncPost(c, &c->dma.asyncCtx.buf);
+        if (rc == WH_ERROR_OK) {
+            rc = postRc;
+        }
+    }
     return rc;
 }
 
diff --git a/src/wh_server_dma.c b/src/wh_server_dma.c
index 023aeaddf..21c78384b 100644
--- a/src/wh_server_dma.c
+++ b/src/wh_server_dma.c
@@ -77,10 +77,12 @@ int wh_Server_DmaRegisterMemCopyCb(whServerContext* server,
 int wh_Server_DmaRegisterAllowList(whServerContext*                server,
                                    const whServerDmaAddrAllowList* allowlist)
 {
-    if (NULL == server || NULL == allowlist) {
+    if (NULL == server) {
         return WH_ERROR_BADARGS;
     }
 
+    /* A NULL allowlist clears any previously registered list (no enforcement),
+     * symmetric with wh_Server_DmaRegisterCb(NULL). */
     server->dma.dmaAddrAllowList = allowlist;
 
     return WH_ERROR_OK;
diff --git a/wolfhsm/wh_client.h b/wolfhsm/wh_client.h
index 6be78cc9f..77e541504 100644
--- a/wolfhsm/wh_client.h
+++ b/wolfhsm/wh_client.h
@@ -147,14 +147,35 @@ typedef struct {
     uint64_t  inSz;
 } whClientDmaAsyncCmac;
 
-/* Async DMA context union. Only one DMA request can be in flight at a time
- * per client context, so a single union suffices. Each Response function
- * knows which member to access based on its own operation type. */
+/* One client buffer mapped across a DMA Request/Response boundary. The Request
+ * stashes the translated address, original client address, length, and POST
+ * direction; the Response runs wh_Client_DmaAsyncPost(). sz == 0 means nothing
+ * to clean up. postOper keeps the POST direction-correct when this shared union
+ * member is used by different ops; it does not make a mispaired Request/Response
+ * safe (the one-in-flight, self-paired invariant still applies). */
+typedef struct {
+    uintptr_t xformedAddr;
+    uintptr_t clientAddr;
+    uint64_t  sz;
+    whDmaOper postOper;
+} whClientDmaAsyncBuf;
+
+/* Two buffers mapped together for NvmAddObjectDma (metadata + optional data). */
+typedef struct {
+    whClientDmaAsyncBuf meta;
+    whClientDmaAsyncBuf data;
+} whClientDmaAsyncNvmAdd;
+
+/* Async DMA context union; only one DMA request is in flight at a time. The
+ * crypto members (sha/rng/cmac/aes) are bespoke and predate the generic holder.
+ * Key/NVM ops use `buf` (single-buffer) and `nvmAdd` (two-buffer). */
 typedef union {
-    whClientDmaAsyncSha  sha;
-    whClientDmaAsyncRng  rng;
-    whClientDmaAsyncAes  aes;
-    whClientDmaAsyncCmac cmac;
+    whClientDmaAsyncSha    sha;
+    whClientDmaAsyncRng    rng;
+    whClientDmaAsyncAes    aes;
+    whClientDmaAsyncCmac   cmac;
+    whClientDmaAsyncBuf    buf;
+    whClientDmaAsyncNvmAdd nvmAdd;
 } whClientDmaAsyncCtx;
 
 typedef struct {
@@ -3362,6 +3383,24 @@ int wh_Client_DmaProcessClientAddress(struct whClientContext_t* client,
                                       whDmaFlags flags);
 
 
+/**
+ * @brief Runs the POST half of a stashed DMA buffer mapping (INTERNAL).
+ *
+ * Shared between the client *Dma source files; not port-facing. Releases (and,
+ * for a server-write buffer, copies back) a mapping stashed by the matching
+ * Request, using buf->postOper for the direction. No-op when buf is NULL or
+ * buf->sz is 0; clears buf->sz (even on failure) so a later Response cannot
+ * re-run it.
+ *
+ * @param[in] client Pointer to the client context.
+ * @param[in,out] buf The stashed single-buffer mapping to clean up.
+ * @return WH_ERROR_OK, or the port POST callback's error (e.g. a failed unmap
+ *         or copy-back); WH_ERROR_OK when there is nothing to clean up.
+ */
+int wh_Client_DmaAsyncPost(struct whClientContext_t* client,
+                           whClientDmaAsyncBuf*       buf);
+
+
 /**
  * @brief Sends a DMA request and receives a response to verify an attribute
  * certificate.

From b611887becb9537e5f0825d9422eac8a213e2590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Frauenschl=C3=A4ger?= <tobias@wolfssl.com>
Date: Fri, 5 Jun 2026 13:47:51 +0200
Subject: [PATCH 2/2] test: exercise client *Dma APIs through a translating DMA
 callback

The existing POSIX tests never registered a non-identity DMA callback, so a
*Dma API that skipped translation looked identical to a correct one -- which is
why the keystore/NVM translation bugs survived CI. Add a shared "bounce-pool"
harness (test/wh_test_dma.c) modeling a split-address-space port: the client
callback bounces each buffer through a pool, the server callback rejects any
untranslated address, freed slots are poisoned so a premature POST
(use-after-free) corrupts the data, and a POST matching no live slot is recorded
as a stray/double free.

- wh_test_clientserver.c drives the keystore/NVM *Dma APIs (including
  metadata-only and injected PRE-failure cases) in the single-thread pump
  harness, where ordering makes the use-after-free deterministic.
- wh_test_crypto.c registers the same callback on the threaded harness so the
  whole crypto/cert *Dma suite runs through translation.
- _testDma now unregisters its stack-local allow list before returning (a
  pre-existing dangling pointer ASAN flags once a later test uses DMA).

Runs under `make DMA=1`.
---
 test/wh_test_clientserver.c | 200 ++++++++++++++++++++++++++++++++
 test/wh_test_crypto.c       |  43 ++++++-
 test/wh_test_dma.c          | 224 ++++++++++++++++++++++++++++++++++++
 test/wh_test_dma.h          |  42 +++++++
 4 files changed, 507 insertions(+), 2 deletions(-)

diff --git a/test/wh_test_clientserver.c b/test/wh_test_clientserver.c
index de8e66561..baacc9f58 100644
--- a/test/wh_test_clientserver.c
+++ b/test/wh_test_clientserver.c
@@ -24,6 +24,7 @@
 
 #include "wh_test_common.h"
 #include "wh_test_clientserver.h"
+#include "wh_test_dma.h"
 #include "wolfhsm/wh_error.h"
 
 #include "wolfhsm/wh_comm.h"
@@ -406,8 +407,203 @@ static int _testDma(whServerContext* server, whClientContext* client)
                               sizeof(testMem.srvBufAllow),
                               (whServerDmaFlags){0}));
 
+    /* Unregister the allow list: it points at this function's stack-local
+     * testMem, which is about to go out of scope. */
+    WH_TEST_RETURN_ON_FAIL(wh_Server_DmaRegisterAllowList(server, NULL));
+
     return rc;
 }
+
+/*
+ * Drive the keystore/NVM *Dma client APIs end-to-end through the shared
+ * bounce-pool translating DMA callback (see test/wh_test_dma.c). The server can
+ * only touch the pool, so any API that forgets to translate is rejected; the
+ * single-thread pump makes the old KeyCacheDma use-after-free deterministic.
+ * On failure control jumps to cleanup so the callbacks are always unregistered.
+ */
+#define BOUNCE_TEST_NVM_ID 0x4242 /* arbitrary id, destroyed at end of test */
+
+/* Local fail/assert helpers that unwind to cleanup instead of returning. */
+#define BOUNCE_FAIL(expr)                          \
+    do {                                           \
+        if ((rc = (expr)) != WH_ERROR_OK) {        \
+            goto cleanup;                          \
+        }                                          \
+    } while (0)
+#define BOUNCE_ASSERT(cond)                        \
+    do {                                           \
+        if (!(cond)) {                             \
+            WH_ERROR_PRINT("bounce assert failed: %s (line %d)\n", #cond, \
+                           __LINE__);              \
+            rc = WH_ERROR_ABORTED;                 \
+            goto cleanup;                          \
+        }                                          \
+    } while (0)
+
+static int _testClientDmaBounce(whServerContext* server, whClientContext* client)
+{
+    int     rc        = WH_ERROR_OK;
+    int32_t server_rc = 0;
+
+    /* key material to cache + export back */
+    uint8_t  keyIn[32];
+    uint8_t  keyOut[32];
+    uint8_t  labelIn[WH_NVM_LABEL_LEN];
+    uint8_t  labelOut[WH_NVM_LABEL_LEN];
+    uint16_t keyIdIn  = (uint16_t)WH_KEYID_ERASED;
+    uint16_t keyIdOut = 0;
+    uint16_t keyOutSz = sizeof(keyOut);
+
+    /* NVM object to add (server reads) and read back (server writes) */
+    whNvmMetadata meta    = {0};
+    const char*   dataIn  = "bounce-pool-payload";
+    whNvmSize     dataLen = (whNvmSize)strlen(dataIn);
+    uint8_t       dataOut[64];
+
+    WH_TEST_PRINT(
+        "Testing client *Dma APIs through a translating DMA callback...\n");
+
+    whTestDma_BounceReset();
+    memset(keyIn, 0x5A, sizeof(keyIn));
+    memset(labelIn, 0, sizeof(labelIn));
+    (void)snprintf((char*)labelIn, sizeof(labelIn), "bounce-key");
+
+    meta.id     = BOUNCE_TEST_NVM_ID;
+    meta.access = WH_NVM_ACCESS_ANY;
+    meta.flags  = WH_NVM_FLAGS_NONE;
+    meta.len    = dataLen;
+    (void)snprintf((char*)meta.label, sizeof(meta.label), "bounce-obj");
+
+    /* From here on the server can only touch the bounce pool: the server
+     * callback rejects any address the client failed to translate. */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, whTestDma_BounceClientCb));
+    BOUNCE_FAIL(wh_Server_DmaRegisterCb(server, whTestDma_BounceServerCb));
+
+    /* --- NvmAddObjectDma: server READS metadata + data --- */
+    BOUNCE_FAIL(wh_Client_NvmAddObjectDmaRequest(client, &meta, dataLen,
+                                                 (const uint8_t*)dataIn));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmAddObjectDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- NvmReadDma: server WRITES the object's data back --- */
+    memset(dataOut, 0, sizeof(dataOut));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                            dataOut));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    BOUNCE_ASSERT(0 == memcmp(dataIn, dataOut, dataLen));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- NvmAddObjectDma metadata-only (data == NULL): the data slot is not
+     * populated here. Poison it first; a Request that fails to clear it makes
+     * the Response POST a stale mapping (a stray POST), caught below. --- */
+    client->dma.asyncCtx.nvmAdd.data.xformedAddr = (uintptr_t)0xBADC0DE;
+    client->dma.asyncCtx.nvmAdd.data.clientAddr  = (uintptr_t)0xBADC0DE;
+    client->dma.asyncCtx.nvmAdd.data.sz          = 1; /* would trigger a POST */
+    client->dma.asyncCtx.nvmAdd.data.postOper    = WH_DMA_OPER_CLIENT_READ_POST;
+    {
+        whNvmMetadata metaOnly = {0};
+        whNvmId       moId     = (whNvmId)(BOUNCE_TEST_NVM_ID + 1);
+
+        metaOnly.id     = moId;
+        metaOnly.access = WH_NVM_ACCESS_ANY;
+        metaOnly.flags  = WH_NVM_FLAGS_NONE;
+        metaOnly.len    = 0;
+        (void)snprintf((char*)metaOnly.label, sizeof(metaOnly.label),
+                       "bounce-meta");
+
+        BOUNCE_FAIL(
+            wh_Client_NvmAddObjectDmaRequest(client, &metaOnly, 0, NULL));
+        BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+        BOUNCE_FAIL(wh_Client_NvmAddObjectDmaResponse(client, &server_rc));
+        BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+        BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+        BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+        BOUNCE_FAIL(wh_Client_NvmDestroyObjectsRequest(client, 1, &moId));
+        BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+        BOUNCE_FAIL(wh_Client_NvmDestroyObjectsResponse(client, &server_rc));
+        BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+    }
+
+    /* --- Leak recovery on a PRE failure: a Request that fails after acquiring
+     * a mapping must release it. Inject an alloc failure and assert nothing is
+     * left outstanding. No request is sent (PRE fails first), so the comm stays
+     * idle for the cases below. --- */
+    whTestDma_BounceSetAllocBudget(0); /* first PRE fails: nothing acquired */
+    BOUNCE_ASSERT(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                              dataOut) != WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    whTestDma_BounceSetAllocBudget(1); /* meta PRE ok, data PRE fails */
+    BOUNCE_ASSERT(wh_Client_NvmAddObjectDmaRequest(
+                      client, &meta, dataLen, (const uint8_t*)dataIn) !=
+                  WH_ERROR_OK);
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0); /* meta slot released */
+    whTestDma_BounceSetAllocBudget(-1);                /* restore unlimited */
+    BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+    /* --- KeyCacheDma (server READS) then KeyExportDma (server WRITES): the
+     * use-after-free guard. If KeyCacheDma POSTs inside the Request (the old
+     * bug), the key slot is poisoned before the server reads it, the server
+     * caches poison, and the exported key mismatches keyIn below. --- */
+    BOUNCE_FAIL(wh_Client_KeyCacheDmaRequest(client, 0, labelIn, sizeof(labelIn),
+                                             keyIn, sizeof(keyIn), keyIdIn));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_KeyCacheDmaResponse(client, &keyIdOut));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    memset(keyOut, 0, sizeof(keyOut));
+    memset(labelOut, 0, sizeof(labelOut));
+    BOUNCE_FAIL(wh_Client_KeyExportDmaRequest(client, keyIdOut, keyOut,
+                                              sizeof(keyOut)));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_KeyExportDmaResponse(client, labelOut,
+                                               sizeof(labelOut), &keyOutSz));
+    BOUNCE_ASSERT(keyOutSz == sizeof(keyIn));
+    BOUNCE_ASSERT(0 == memcmp(keyIn, keyOut, sizeof(keyIn)));
+    BOUNCE_ASSERT(0 == memcmp(labelIn, labelOut, sizeof(labelIn)));
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+
+    /* --- Teeth check: with the client callback removed, the raw client pointer
+     * is outside the pool, so the server callback must reject it. --- */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, NULL));
+    memset(dataOut, 0, sizeof(dataOut));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaRequest(client, meta.id, 0, dataLen,
+                                            dataOut));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmReadDmaResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc != WH_ERROR_OK);
+
+    /* Remove the test object (normal-path teardown). */
+    BOUNCE_FAIL(wh_Client_DmaRegisterCb(client, whTestDma_BounceClientCb));
+    BOUNCE_FAIL(wh_Client_NvmDestroyObjectsRequest(client, 1, &meta.id));
+    BOUNCE_FAIL(wh_Server_HandleRequestMessage(server));
+    BOUNCE_FAIL(wh_Client_NvmDestroyObjectsResponse(client, &server_rc));
+    BOUNCE_ASSERT(server_rc == WH_ERROR_OK);
+
+    /* No mapping was leaked and no stray/double POST occurred across the run. */
+    BOUNCE_ASSERT(whTestDma_BounceOutstanding() == 0);
+    BOUNCE_ASSERT(whTestDma_BounceStrayPosts() == 0);
+
+    WH_TEST_PRINT("Client *Dma translating-callback tests PASSED\n");
+
+cleanup:
+    /* Always unregister both callbacks so a failure cannot leak the pool-only
+     * enforcement into later tests (both accept NULL). The test object is
+     * removed on the normal path only; on failure the suite aborts and the next
+     * harness instance uses fresh NVM, so we avoid more transport traffic over
+     * a possibly half-processed request. */
+    (void)wh_Client_DmaRegisterCb(client, NULL);
+    (void)wh_Server_DmaRegisterCb(server, NULL);
+    return rc;
+}
+
+#undef BOUNCE_FAIL
+#undef BOUNCE_ASSERT
 #endif /* WOLFHSM_CFG_DMA && WOLFHSM_CFG_ENABLE_CLIENT && \
           WOLFHSM_CFG_ENABLE_SERVER */
 
@@ -1199,6 +1395,10 @@ int whTest_ClientServerSequential(whTestNvmBackendType nvmType)
 #ifdef WOLFHSM_CFG_DMA
     /* Test DMA callbacks and address allowlisting */
     WH_TEST_RETURN_ON_FAIL(_testDma(server, client));
+
+    /* Drive the client *Dma APIs through a translating callback so a missing
+     * translation is caught on POSIX, not just on cross-domain hardware. */
+    WH_TEST_RETURN_ON_FAIL(_testClientDmaBounce(server, client));
 #endif /* WOLFHSM_CFG_DMA */
 
     /* Check that we are still connected */
diff --git a/test/wh_test_crypto.c b/test/wh_test_crypto.c
index da9434743..bdede4dd3 100644
--- a/test/wh_test_crypto.c
+++ b/test/wh_test_crypto.c
@@ -63,6 +63,7 @@
 #include "wolfhsm/wh_crypto.h"
 
 #include "wh_test_common.h"
+#include "wh_test_dma.h"
 
 #if defined(WOLFHSM_CFG_TEST_POSIX)
 #include <unistd.h> /* For sleep */
@@ -14438,7 +14439,13 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     }};
 
 #ifdef WOLFHSM_CFG_DMA
-    whClientDmaConfig clientDmaConfig = {0};
+    /* Run every crypto/cert *Dma op through the bounce-pool callback so a
+     * missing translation is rejected (see test/wh_test_dma.c). Catches missing
+     * translation; the use-after-free class is covered by the single-thread
+     * harness. */
+    whClientDmaConfig clientDmaConfig = {
+        .cb = whTestDma_BounceClientCb,
+    };
 #endif
     whClientConfig c_conf[1] = {{
         .comm = cc_conf,
@@ -14479,16 +14486,30 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     /* Crypto context */
     whServerCryptoContext crypto[1] = {0};
 
+#ifdef WOLFHSM_CFG_DMA
+    /* Server may only touch the bounce pool; this callback rejects an
+     * untranslated client pointer. */
+    whServerDmaConfig serverDmaConfig = {
+        .cb = whTestDma_BounceServerCb,
+    };
+#endif
 
     whServerConfig s_conf[1] = {{
         .comm_config = cs_conf,
         .nvm         = nvm,
         .crypto      = crypto,
         .devId       = INVALID_DEVID,
+#ifdef WOLFHSM_CFG_DMA
+        .dmaConfig   = &serverDmaConfig,
+#endif
     }};
 
     WH_TEST_RETURN_ON_FAIL(wh_Nvm_Init(nvm, n_conf));
 
+#ifdef WOLFHSM_CFG_DMA
+    whTestDma_BounceReset();
+#endif
+
     ret = wolfCrypt_Init();
     if (ret == 0) {
         ret = wc_InitRng_ex(crypto->rng, NULL, INVALID_DEVID);
@@ -14497,6 +14518,22 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
         }
         else {
             _whClientServerThreadTest(c_conf, s_conf);
+#ifdef WOLFHSM_CFG_DMA
+            /* After the client thread joins, no mapping may be outstanding and
+             * no POST may have hit a stale/unknown slot. */
+            if (whTestDma_BounceOutstanding() != 0) {
+                WH_ERROR_PRINT("wh_test bounce: %d DMA mapping(s) leaked "
+                               "across the crypto suite\n",
+                               whTestDma_BounceOutstanding());
+                ret = WH_ERROR_ABORTED;
+            }
+            if (whTestDma_BounceStrayPosts() != 0) {
+                WH_ERROR_PRINT("wh_test bounce: %d stray/double DMA POST(s) "
+                               "across the crypto suite\n",
+                               whTestDma_BounceStrayPosts());
+                ret = WH_ERROR_ABORTED;
+            }
+#endif
         }
     }
     else {
@@ -14507,7 +14544,9 @@ static int wh_ClientServer_MemThreadTest(whTestNvmBackendType nvmType)
     wc_FreeRng(crypto->rng);
     wolfCrypt_Cleanup();
 
-    return WH_ERROR_OK;
+    /* Propagate ret (was hard-coded WH_ERROR_OK): surfaces an init failure and
+     * the DMA bounce leak check instead of silently passing. */
+    return ret;
 }
 #endif /* WOLFHSM_CFG_TEST_POSIX */
 
diff --git a/test/wh_test_dma.c b/test/wh_test_dma.c
index d57e0bccd..88bd3db46 100644
--- a/test/wh_test_dma.c
+++ b/test/wh_test_dma.c
@@ -37,6 +37,230 @@
 #include "wh_test_common.h"
 #include "wh_test_dma.h"
 
+/*
+ * Shared "bounce-pool" translating DMA callback harness.
+ *
+ * Models a split-address-space port: the server can only reach a dedicated pool,
+ * not arbitrary client RAM. The client callback bounces each buffer through a
+ * pool slot and hands the server the pool address; the server callback rejects
+ * (WH_ERROR_ACCESS) any address outside the pool, so a *Dma API that forgot to
+ * translate is caught. Freed slots are poisoned, so a premature POST
+ * (use-after-free) corrupts the data, and a POST matching no live slot is
+ * counted as a stray/double POST.
+ *
+ * Missing translation is caught in both harnesses; the use-after-free class is
+ * deterministic only in the single-thread pump harness, where the client POST
+ * is ordered before the server read.
+ *
+ * Single-client only: the allocator is mutated only by the (serialized) client
+ * side; the server just reads/writes pool bytes, with happens-before provided
+ * by the request/response round-trip through the transport.
+ */
+struct whClientContext_t; /* opaque: callbacks only use the pointer */
+struct whServerContext_t; /* opaque: callbacks only use the pointer */
+
+/* Generous headroom for the few buffers one op maps concurrently (the largest
+ * being an ML-DSA-sized key); the pool recycles between ops. */
+#define BOUNCE_POOL_SIZE \
+    ((64 * 1024) + (8 * WOLFHSM_CFG_SERVER_KEYCACHE_BIG_BUFSIZE))
+#define BOUNCE_POOL_SLOTS 64
+#define BOUNCE_POISON_BYTE ((uint8_t)0xEF)
+
+typedef struct {
+    int       inUse;
+    uintptr_t base; /* address within g_bouncePool */
+    size_t    len;
+} bounceSlot;
+
+static uint8_t    g_bouncePool[BOUNCE_POOL_SIZE];
+static bounceSlot g_bounceSlots[BOUNCE_POOL_SLOTS];
+static size_t     g_bounceUsed;        /* bump offset into the pool */
+static int        g_bounceOutstanding; /* slots currently allocated */
+static int        g_bounceStrayPost;   /* len>0 POSTs with no matching slot */
+static int        g_bounceAllocBudget; /* allocs still allowed; <0 = unlimited */
+
+void whTestDma_BounceReset(void)
+{
+    memset(g_bouncePool, BOUNCE_POISON_BYTE, sizeof(g_bouncePool));
+    memset(g_bounceSlots, 0, sizeof(g_bounceSlots));
+    g_bounceUsed        = 0;
+    g_bounceOutstanding = 0;
+    g_bounceStrayPost   = 0;
+    g_bounceAllocBudget = -1;
+}
+
+int whTestDma_BounceOutstanding(void)
+{
+    return g_bounceOutstanding;
+}
+
+int whTestDma_BounceStrayPosts(void)
+{
+    return g_bounceStrayPost;
+}
+
+void whTestDma_BounceSetAllocBudget(int allocs)
+{
+    g_bounceAllocBudget = allocs;
+}
+
+static bounceSlot* _bounceAlloc(size_t len)
+{
+    int    i;
+    size_t aligned = (len + 7u) & ~(size_t)7u; /* 8-byte align slices */
+
+    /* Injected failure for exercising leak-recovery paths; no diagnostic. */
+    if (g_bounceAllocBudget == 0) {
+        return NULL;
+    }
+
+    if (g_bounceUsed + aligned > sizeof(g_bouncePool)) {
+        /* With recycle-on-empty this usually means a leaked mapping (PRE
+         * without POST) rather than a too-small pool. */
+        WH_ERROR_PRINT("wh_test bounce: pool exhausted (used %u + %u > %u, "
+                       "%d outstanding); likely a leaked DMA mapping\n",
+                       (unsigned)g_bounceUsed, (unsigned)aligned,
+                       (unsigned)sizeof(g_bouncePool), g_bounceOutstanding);
+        return NULL;
+    }
+    for (i = 0; i < BOUNCE_POOL_SLOTS; i++) {
+        if (!g_bounceSlots[i].inUse) {
+            g_bounceSlots[i].inUse = 1;
+            g_bounceSlots[i].base  = (uintptr_t)&g_bouncePool[g_bounceUsed];
+            g_bounceSlots[i].len   = len;
+            g_bounceUsed += aligned;
+            g_bounceOutstanding++;
+            if (g_bounceAllocBudget > 0) {
+                g_bounceAllocBudget--;
+            }
+            return &g_bounceSlots[i];
+        }
+    }
+    WH_ERROR_PRINT("wh_test bounce: out of slots (%d); raise BOUNCE_POOL_SLOTS "
+                   "or check for a leaked mapping\n",
+                   BOUNCE_POOL_SLOTS);
+    return NULL;
+}
+
+static bounceSlot* _bounceFind(uintptr_t base)
+{
+    int i;
+    for (i = 0; i < BOUNCE_POOL_SLOTS; i++) {
+        if (g_bounceSlots[i].inUse && g_bounceSlots[i].base == base) {
+            return &g_bounceSlots[i];
+        }
+    }
+    return NULL;
+}
+
+static void _bounceFree(bounceSlot* s)
+{
+    /* Poison on free so any read of a stale (post-POST) slot is detectable. */
+    memset((void*)s->base, BOUNCE_POISON_BYTE, s->len);
+    s->inUse = 0;
+    s->base  = 0;
+    s->len   = 0;
+    g_bounceOutstanding--;
+    /* Recycle the whole pool once every slot has been released, so a long run
+     * of operations cannot exhaust the bump offset. */
+    if (g_bounceOutstanding == 0) {
+        g_bounceUsed = 0;
+    }
+}
+
+int whTestDma_BounceClientCb(struct whClientContext_t* client,
+                             uintptr_t clientAddr, void** xformedAddr,
+                             size_t len, whDmaOper oper, whDmaFlags flags)
+{
+    bounceSlot* s;
+    (void)client;
+    (void)flags;
+
+    /* Zero-length operations carry no data and are never dereferenced by the
+     * server; pass the address through untouched (no slot needed). */
+    if (len == 0) {
+        *xformedAddr = (void*)clientAddr;
+        return WH_ERROR_OK;
+    }
+
+    switch (oper) {
+        case WH_DMA_OPER_CLIENT_READ_PRE:
+            /* Server is about to read client memory: copy it into a pool slot
+             * and hand the server the pool address. */
+            s = _bounceAlloc(len);
+            if (s == NULL) {
+                return WH_ERROR_ABORTED;
+            }
+            memcpy((void*)s->base, (void*)clientAddr, len);
+            *xformedAddr = (void*)s->base;
+            break;
+
+        case WH_DMA_OPER_CLIENT_WRITE_PRE:
+            /* Server is about to write client memory: give it a pool slot to
+             * write into. */
+            s = _bounceAlloc(len);
+            if (s == NULL) {
+                return WH_ERROR_ABORTED;
+            }
+            *xformedAddr = (void*)s->base;
+            break;
+
+        case WH_DMA_OPER_CLIENT_READ_POST:
+            /* Release (and poison) the slot. A len>0 POST matching no live slot
+             * is a stray/double POST (a real port would free a bogus pointer);
+             * record it. */
+            s = _bounceFind((uintptr_t)*xformedAddr);
+            if (s != NULL) {
+                _bounceFree(s);
+            }
+            else {
+                g_bounceStrayPost++;
+            }
+            break;
+
+        case WH_DMA_OPER_CLIENT_WRITE_POST:
+            /* Server done writing: copy the result back to the client buffer,
+             * then release (and poison) the slot. See READ_POST on stray. */
+            s = _bounceFind((uintptr_t)*xformedAddr);
+            if (s != NULL) {
+                memcpy((void*)clientAddr, (void*)s->base, len);
+                _bounceFree(s);
+            }
+            else {
+                g_bounceStrayPost++;
+            }
+            break;
+    }
+    return WH_ERROR_OK;
+}
+
+int whTestDma_BounceServerCb(struct whServerContext_t* server,
+                             uintptr_t clientAddr, void** serverPtr, size_t len,
+                             whDmaOper oper, whDmaFlags flags)
+{
+    uintptr_t base = (uintptr_t)g_bouncePool;
+    (void)server;
+    (void)oper;
+    (void)flags;
+
+    /* An address outside the pool means a *Dma path skipped translation and
+     * sent a raw client pointer; reject it. Overflow-safe: clientAddr - base is
+     * only formed once clientAddr >= base. */
+    if (len > 0) {
+        if (clientAddr < base || clientAddr - base > sizeof(g_bouncePool) ||
+            len > sizeof(g_bouncePool) - (clientAddr - base)) {
+            WH_ERROR_PRINT("wh_test bounce: server got untranslated address %p "
+                           "(len %u) outside the DMA pool\n",
+                           (void*)clientAddr, (unsigned)len);
+            return WH_ERROR_ACCESS;
+        }
+    }
+
+    /* Pool address is directly usable by the server in this same process. */
+    *serverPtr = (void*)clientAddr;
+    return WH_ERROR_OK;
+}
+
 static int whTest_DmaAllowListBasic(void)
 {
     int                rc;
diff --git a/test/wh_test_dma.h b/test/wh_test_dma.h
index c411dfe21..3ec0854d1 100644
--- a/test/wh_test_dma.h
+++ b/test/wh_test_dma.h
@@ -24,6 +24,48 @@
 #ifndef TEST_WH_TEST_DMA_H_
 #define TEST_WH_TEST_DMA_H_
 
+#include "wolfhsm/wh_settings.h"
+
 int whTest_Dma(void);
 
+#ifdef WOLFHSM_CFG_DMA
+#include <stdint.h>
+#include <stddef.h>
+#include "wolfhsm/wh_dma.h"
+
+struct whClientContext_t;
+struct whServerContext_t;
+
+/* Shared "bounce-pool" translating DMA callback harness (see wh_test_dma.c).
+ * Register whTestDma_BounceClientCb / whTestDma_BounceServerCb as the client /
+ * server DMA callbacks; the server callback rejects any address a *Dma path
+ * failed to translate. Single-client only (see wh_test_dma.c). */
+
+/* Reset the pool between independent test sequences. */
+void whTestDma_BounceReset(void);
+
+/* Translating client DMA callback (matches whClientDmaClientMemCb). */
+int whTestDma_BounceClientCb(struct whClientContext_t* client,
+                             uintptr_t clientAddr, void** xformedAddr,
+                             size_t len, whDmaOper oper, whDmaFlags flags);
+
+/* Validating server DMA callback (matches whServerDmaClientMemCb): identity
+ * maps in-pool addresses, rejects out-of-pool ones with WH_ERROR_ACCESS. */
+int whTestDma_BounceServerCb(struct whServerContext_t* server,
+                             uintptr_t clientAddr, void** serverPtr, size_t len,
+                             whDmaOper oper, whDmaFlags flags);
+
+/* Slots currently allocated (0 between operations); for leak assertions. */
+int whTestDma_BounceOutstanding(void);
+
+/* Count of len>0 POSTs that found no matching live slot (a stray/double POST).
+ * Should stay 0. */
+int whTestDma_BounceStrayPosts(void);
+
+/* Fault injection: allow this many further slot allocations, then fail (the
+ * client callback returns an error). Negative = unlimited (the default). Used
+ * to drive the *Dma Request leak-recovery paths. */
+void whTestDma_BounceSetAllocBudget(int allocs);
+#endif /* WOLFHSM_CFG_DMA */
+
 #endif /* TEST_WH_TEST_DMA_H_ */