Skip to content

Commit f558630

Browse files
committed
drm/xe/migrate: skip bounce buffer path on xe2
Now that we support MEM_COPY we should be able to use the PAGE_COPY mode, otherwise falling back to BYTE_COPY mode when we have odd sizing/alignment. v2: - Use info.has_mem_copy_instr - Rebase on latest changes. v3 (Matt Brost): - Allow various pitches including 1byte pitch for MEM_COPY Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://lore.kernel.org/r/20251022163836.191405-8-matthew.auld@intel.com
1 parent 1e12dba commit f558630

1 file changed

Lines changed: 32 additions & 11 deletions

File tree

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,6 +1920,25 @@ enum xe_migrate_copy_dir {
19201920
#define XE_CACHELINE_BYTES 64ull
19211921
#define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1)
19221922

1923+
static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len)
1924+
{
1925+
u32 pitch;
1926+
1927+
if (IS_ALIGNED(len, PAGE_SIZE))
1928+
pitch = PAGE_SIZE;
1929+
else if (IS_ALIGNED(len, SZ_4K))
1930+
pitch = SZ_4K;
1931+
else if (IS_ALIGNED(len, SZ_256))
1932+
pitch = SZ_256;
1933+
else if (IS_ALIGNED(len, 4))
1934+
pitch = 4;
1935+
else
1936+
pitch = 1;
1937+
1938+
xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr);
1939+
return pitch;
1940+
}
1941+
19231942
static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
19241943
unsigned long len,
19251944
unsigned long sram_offset,
@@ -1937,14 +1956,14 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
19371956
struct xe_bb *bb;
19381957
u32 update_idx, pt_slot = 0;
19391958
unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE);
1940-
unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ?
1941-
PAGE_SIZE : 4;
1959+
unsigned int pitch = xe_migrate_copy_pitch(xe, len);
19421960
int err;
19431961
unsigned long i, j;
19441962
bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset);
19451963

1946-
if (drm_WARN_ON(&xe->drm, (!IS_ALIGNED(len, pitch)) ||
1947-
(sram_offset | vram_addr) & XE_CACHELINE_MASK))
1964+
if (!xe->info.has_mem_copy_instr &&
1965+
drm_WARN_ON(&xe->drm,
1966+
(!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK))
19481967
return ERR_PTR(-EOPNOTSUPP);
19491968

19501969
xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER);
@@ -2163,9 +2182,10 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
21632182
xe_bo_assert_held(bo);
21642183

21652184
/* Use bounce buffer for small access and unaligned access */
2166-
if (!IS_ALIGNED(len, 4) ||
2167-
!IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) ||
2168-
!IS_ALIGNED(offset, XE_CACHELINE_BYTES)) {
2185+
if (!xe->info.has_mem_copy_instr &&
2186+
(!IS_ALIGNED(len, 4) ||
2187+
!IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) ||
2188+
!IS_ALIGNED(offset, XE_CACHELINE_BYTES))) {
21692189
int buf_offset = 0;
21702190
void *bounce;
21712191
int err;
@@ -2227,20 +2247,21 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
22272247
u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) +
22282248
cursor.start;
22292249
int current_bytes;
2250+
u32 pitch;
22302251

22312252
if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER)
22322253
current_bytes = min_t(int, bytes_left,
22332254
MAX_PREEMPTDISABLE_TRANSFER);
22342255
else
22352256
current_bytes = min_t(int, bytes_left, cursor.size);
22362257

2237-
if (current_bytes & ~PAGE_MASK) {
2238-
int pitch = 4;
2239-
2258+
pitch = xe_migrate_copy_pitch(xe, current_bytes);
2259+
if (xe->info.has_mem_copy_instr)
2260+
current_bytes = min_t(int, current_bytes, U16_MAX * pitch);
2261+
else
22402262
current_bytes = min_t(int, current_bytes,
22412263
round_down(S16_MAX * pitch,
22422264
XE_CACHELINE_BYTES));
2243-
}
22442265

22452266
__fence = xe_migrate_vram(m, current_bytes,
22462267
(unsigned long)buf & ~PAGE_MASK,

0 commit comments

Comments
 (0)