Skip to content

Commit b022f59

Browse files
Fenghua Yuvinodkoul
authored andcommitted
dmaengine: idxd: add idxd_copy_cr() to copy user completion record during page fault handling
Define idxd_copy_cr() to copy completion record to fault address in user address that is found by work queue (wq) and PASID. It will be used to write the user's completion record that the hardware device is not able to write due to user completion record page fault. An xarray is added to associate the PASID and mm with the struct idxd_user_context so mm can be found by PASID and wq. It is called when handling the completion record fault in a kernel thread context. Switch to the mm using kthread_use_vm() and copy the completion record to the mm via copy_to_user(). Once the copy is completed, switch back to the current mm using kthread_unuse_mm(). Suggested-by: Christoph Hellwig <hch@infradead.org> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Suggested-by: Tony Luck <tony.luck@intel.com> Tested-by: Tony Zhu <tony.zhu@intel.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/20230407203143.2189681-9-fenghua.yu@intel.com Signed-off-by: Vinod Koul <vkoul@kernel.org>
1 parent c2f156b commit b022f59

4 files changed

Lines changed: 111 additions & 5 deletions

File tree

drivers/dma/idxd/cdev.c

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include <linux/fs.h>
1212
#include <linux/poll.h>
1313
#include <linux/iommu.h>
14+
#include <linux/highmem.h>
1415
#include <uapi/linux/idxd.h>
16+
#include <linux/xarray.h>
1517
#include "registers.h"
1618
#include "idxd.h"
1719

@@ -34,6 +36,7 @@ struct idxd_user_context {
3436
struct idxd_wq *wq;
3537
struct task_struct *task;
3638
unsigned int pasid;
39+
struct mm_struct *mm;
3740
unsigned int flags;
3841
struct iommu_sva *sva;
3942
};
@@ -68,6 +71,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode)
6871
return idxd_cdev->wq;
6972
}
7073

74+
static void idxd_xa_pasid_remove(struct idxd_user_context *ctx)
75+
{
76+
struct idxd_wq *wq = ctx->wq;
77+
void *ptr;
78+
79+
mutex_lock(&wq->uc_lock);
80+
ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL);
81+
if (ptr != (void *)ctx)
82+
dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n",
83+
ctx->pasid);
84+
mutex_unlock(&wq->uc_lock);
85+
}
86+
7187
static int idxd_cdev_open(struct inode *inode, struct file *filp)
7288
{
7389
struct idxd_user_context *ctx;
@@ -108,20 +124,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
108124

109125
pasid = iommu_sva_get_pasid(sva);
110126
if (pasid == IOMMU_PASID_INVALID) {
111-
iommu_sva_unbind_device(sva);
112127
rc = -EINVAL;
113-
goto failed;
128+
goto failed_get_pasid;
114129
}
115130

116131
ctx->sva = sva;
117132
ctx->pasid = pasid;
133+
ctx->mm = current->mm;
134+
135+
mutex_lock(&wq->uc_lock);
136+
rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL);
137+
mutex_unlock(&wq->uc_lock);
138+
if (rc < 0)
139+
dev_warn(dev, "PASID entry already exist in xarray.\n");
118140

119141
if (wq_dedicated(wq)) {
120142
rc = idxd_wq_set_pasid(wq, pasid);
121143
if (rc < 0) {
122144
iommu_sva_unbind_device(sva);
123145
dev_err(dev, "wq set pasid failed: %d\n", rc);
124-
goto failed;
146+
goto failed_set_pasid;
125147
}
126148
}
127149
}
@@ -130,7 +152,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
130152
mutex_unlock(&wq->wq_lock);
131153
return 0;
132154

133-
failed:
155+
failed_set_pasid:
156+
if (device_user_pasid_enabled(idxd))
157+
idxd_xa_pasid_remove(ctx);
158+
failed_get_pasid:
159+
if (device_user_pasid_enabled(idxd))
160+
iommu_sva_unbind_device(sva);
161+
failed:
134162
mutex_unlock(&wq->wq_lock);
135163
kfree(ctx);
136164
return rc;
@@ -161,8 +189,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
161189
}
162190
}
163191

164-
if (ctx->sva)
192+
if (ctx->sva) {
165193
iommu_sva_unbind_device(ctx->sva);
194+
idxd_xa_pasid_remove(ctx);
195+
}
166196
kfree(ctx);
167197
mutex_lock(&wq->wq_lock);
168198
idxd_wq_put(wq);
@@ -418,3 +448,70 @@ void idxd_cdev_remove(void)
418448
ida_destroy(&ictx[i].minor_ida);
419449
}
420450
}
451+
452+
/**
453+
* idxd_copy_cr - copy completion record to user address space found by wq and
454+
* PASID
455+
* @wq: work queue
456+
* @pasid: PASID
457+
* @addr: user fault address to write
458+
* @cr: completion record
459+
* @len: number of bytes to copy
460+
*
461+
* This is called by a work that handles completion record fault.
462+
*
463+
* Return: number of bytes copied.
464+
*/
465+
int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
466+
void *cr, int len)
467+
{
468+
struct device *dev = &wq->idxd->pdev->dev;
469+
int left = len, status_size = 1;
470+
struct idxd_user_context *ctx;
471+
struct mm_struct *mm;
472+
473+
mutex_lock(&wq->uc_lock);
474+
475+
ctx = xa_load(&wq->upasid_xa, pasid);
476+
if (!ctx) {
477+
dev_warn(dev, "No user context\n");
478+
goto out;
479+
}
480+
481+
mm = ctx->mm;
482+
/*
483+
* The completion record fault handling work is running in kernel
484+
* thread context. It temporarily switches to the mm to copy cr
485+
* to addr in the mm.
486+
*/
487+
kthread_use_mm(mm);
488+
left = copy_to_user((void __user *)addr + status_size, cr + status_size,
489+
len - status_size);
490+
/*
491+
* Copy status only after the rest of completion record is copied
492+
* successfully so that the user gets the complete completion record
493+
* when a non-zero status is polled.
494+
*/
495+
if (!left) {
496+
u8 status;
497+
498+
/*
499+
* Ensure that the completion record's status field is written
500+
* after the rest of the completion record has been written.
501+
* This ensures that the user receives the correct completion
502+
* record information once polling for a non-zero status.
503+
*/
504+
wmb();
505+
status = *(u8 *)cr;
506+
if (put_user(status, (u8 __user *)addr))
507+
left += status_size;
508+
} else {
509+
left += status_size;
510+
}
511+
kthread_unuse_mm(mm);
512+
513+
out:
514+
mutex_unlock(&wq->uc_lock);
515+
516+
return len - left;
517+
}

drivers/dma/idxd/idxd.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ struct idxd_wq {
215215
char name[WQ_NAME_SIZE + 1];
216216
u64 max_xfer_bytes;
217217
u32 max_batch_size;
218+
219+
/* Lock to protect upasid_xa access. */
220+
struct mutex uc_lock;
221+
struct xarray upasid_xa;
218222
};
219223

220224
struct idxd_engine {
@@ -702,6 +706,8 @@ void idxd_cdev_remove(void);
702706
int idxd_cdev_get_major(struct idxd_device *idxd);
703707
int idxd_wq_add_cdev(struct idxd_wq *wq);
704708
void idxd_wq_del_cdev(struct idxd_wq *wq);
709+
int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
710+
void *buf, int len);
705711

706712
/* perfmon */
707713
#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)

drivers/dma/idxd/init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
199199
}
200200
bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
201201
}
202+
mutex_init(&wq->uc_lock);
203+
xa_init(&wq->upasid_xa);
202204
idxd->wqs[i] = wq;
203205
}
204206

drivers/dma/idxd/sysfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,6 +1292,7 @@ static void idxd_conf_wq_release(struct device *dev)
12921292

12931293
bitmap_free(wq->opcap_bmap);
12941294
kfree(wq->wqcfg);
1295+
xa_destroy(&wq->upasid_xa);
12951296
kfree(wq);
12961297
}
12971298

0 commit comments

Comments
 (0)