@@ -669,6 +669,7 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
669669 u32 mocs = 0 ;
670670 u32 tile_y = 0 ;
671671
672+ xe_gt_assert (gt , !(pitch & 3 ));
672673 xe_gt_assert (gt , size / pitch <= S16_MAX );
673674 xe_gt_assert (gt , pitch / 4 <= S16_MAX );
674675 xe_gt_assert (gt , pitch <= U16_MAX );
@@ -1546,7 +1547,6 @@ void xe_migrate_wait(struct xe_migrate *m)
15461547 dma_fence_wait (m -> fence , false);
15471548}
15481549
1549- #if IS_ENABLED (CONFIG_DRM_XE_DEVMEM_MIRROR )
15501550static u32 pte_update_cmd_size (u64 size )
15511551{
15521552 u32 num_dword ;
@@ -1604,8 +1604,12 @@ enum xe_migrate_copy_dir {
16041604 XE_MIGRATE_COPY_TO_SRAM ,
16051605};
16061606
1607+ #define XE_CACHELINE_BYTES 64ull
1608+ #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1)
1609+
16071610static struct dma_fence * xe_migrate_vram (struct xe_migrate * m ,
1608- unsigned long npages ,
1611+ unsigned long len ,
1612+ unsigned long sram_offset ,
16091613 dma_addr_t * sram_addr , u64 vram_addr ,
16101614 const enum xe_migrate_copy_dir dir )
16111615{
@@ -1615,17 +1619,21 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
16151619 struct dma_fence * fence = NULL ;
16161620 u32 batch_size = 2 ;
16171621 u64 src_L0_ofs , dst_L0_ofs ;
1618- u64 round_update_size ;
16191622 struct xe_sched_job * job ;
16201623 struct xe_bb * bb ;
16211624 u32 update_idx , pt_slot = 0 ;
1625+ unsigned long npages = DIV_ROUND_UP (len + sram_offset , PAGE_SIZE );
1626+ unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK ) ?
1627+ PAGE_SIZE : 4 ;
16221628 int err ;
16231629
1624- if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER )
1625- return ERR_PTR (- EINVAL );
1630+ if (drm_WARN_ON (& xe -> drm , (len & XE_CACHELINE_MASK ) ||
1631+ (sram_offset | vram_addr ) & XE_CACHELINE_MASK ))
1632+ return ERR_PTR (- EOPNOTSUPP );
16261633
1627- round_update_size = npages * PAGE_SIZE ;
1628- batch_size += pte_update_cmd_size (round_update_size );
1634+ xe_assert (xe , npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER );
1635+
1636+ batch_size += pte_update_cmd_size (len );
16291637 batch_size += EMIT_COPY_DW ;
16301638
16311639 bb = xe_bb_new (gt , batch_size , use_usm_batch );
@@ -1635,22 +1643,21 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
16351643 }
16361644
16371645 build_pt_update_batch_sram (m , bb , pt_slot * XE_PAGE_SIZE ,
1638- sram_addr , round_update_size );
1646+ sram_addr , len + sram_offset );
16391647
16401648 if (dir == XE_MIGRATE_COPY_TO_VRAM ) {
1641- src_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1649+ src_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 ) + sram_offset ;
16421650 dst_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
16431651
16441652 } else {
16451653 src_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
1646- dst_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1654+ dst_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 ) + sram_offset ;
16471655 }
16481656
16491657 bb -> cs [bb -> len ++ ] = MI_BATCH_BUFFER_END ;
16501658 update_idx = bb -> len ;
16511659
1652- emit_copy (gt , bb , src_L0_ofs , dst_L0_ofs , round_update_size ,
1653- XE_PAGE_SIZE );
1660+ emit_copy (gt , bb , src_L0_ofs , dst_L0_ofs , len , pitch );
16541661
16551662 job = xe_bb_create_migration_job (m -> q , bb ,
16561663 xe_migrate_batch_base (m , use_usm_batch ),
@@ -1698,7 +1705,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
16981705 dma_addr_t * src_addr ,
16991706 u64 dst_addr )
17001707{
1701- return xe_migrate_vram (m , npages , src_addr , dst_addr ,
1708+ return xe_migrate_vram (m , npages * PAGE_SIZE , 0 , src_addr , dst_addr ,
17021709 XE_MIGRATE_COPY_TO_VRAM );
17031710}
17041711
@@ -1719,11 +1726,192 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
17191726 u64 src_addr ,
17201727 dma_addr_t * dst_addr )
17211728{
1722- return xe_migrate_vram (m , npages , dst_addr , src_addr ,
1729+ return xe_migrate_vram (m , npages * PAGE_SIZE , 0 , dst_addr , src_addr ,
17231730 XE_MIGRATE_COPY_TO_SRAM );
17241731}
17251732
1726- #endif
1733+ static void xe_migrate_dma_unmap (struct xe_device * xe , dma_addr_t * dma_addr ,
1734+ int len , int write )
1735+ {
1736+ unsigned long i , npages = DIV_ROUND_UP (len , PAGE_SIZE );
1737+
1738+ for (i = 0 ; i < npages ; ++ i ) {
1739+ if (!dma_addr [i ])
1740+ break ;
1741+
1742+ dma_unmap_page (xe -> drm .dev , dma_addr [i ], PAGE_SIZE ,
1743+ write ? DMA_TO_DEVICE : DMA_FROM_DEVICE );
1744+ }
1745+ kfree (dma_addr );
1746+ }
1747+
1748+ static dma_addr_t * xe_migrate_dma_map (struct xe_device * xe ,
1749+ void * buf , int len , int write )
1750+ {
1751+ dma_addr_t * dma_addr ;
1752+ unsigned long i , npages = DIV_ROUND_UP (len , PAGE_SIZE );
1753+
1754+ dma_addr = kcalloc (npages , sizeof (* dma_addr ), GFP_KERNEL );
1755+ if (!dma_addr )
1756+ return ERR_PTR (- ENOMEM );
1757+
1758+ for (i = 0 ; i < npages ; ++ i ) {
1759+ dma_addr_t addr ;
1760+ struct page * page ;
1761+
1762+ if (is_vmalloc_addr (buf ))
1763+ page = vmalloc_to_page (buf );
1764+ else
1765+ page = virt_to_page (buf );
1766+
1767+ addr = dma_map_page (xe -> drm .dev ,
1768+ page , 0 , PAGE_SIZE ,
1769+ write ? DMA_TO_DEVICE :
1770+ DMA_FROM_DEVICE );
1771+ if (dma_mapping_error (xe -> drm .dev , addr ))
1772+ goto err_fault ;
1773+
1774+ dma_addr [i ] = addr ;
1775+ buf += PAGE_SIZE ;
1776+ }
1777+
1778+ return dma_addr ;
1779+
1780+ err_fault :
1781+ xe_migrate_dma_unmap (xe , dma_addr , len , write );
1782+ return ERR_PTR (- EFAULT );
1783+ }
1784+
1785+ /**
1786+ * xe_migrate_access_memory - Access memory of a BO via GPU
1787+ *
1788+ * @m: The migration context.
1789+ * @bo: buffer object
1790+ * @offset: access offset into buffer object
1791+ * @buf: pointer to caller memory to read into or write from
1792+ * @len: length of access
1793+ * @write: write access
1794+ *
1795+ * Access memory of a BO via GPU either reading in or writing from a passed in
1796+ * pointer. Pointer is dma mapped for GPU access and GPU commands are issued to
1797+ * read to or write from pointer.
1798+ *
1799+ * Returns:
1800+ * 0 if successful, negative error code on failure.
1801+ */
1802+ int xe_migrate_access_memory (struct xe_migrate * m , struct xe_bo * bo ,
1803+ unsigned long offset , void * buf , int len ,
1804+ int write )
1805+ {
1806+ struct xe_tile * tile = m -> tile ;
1807+ struct xe_device * xe = tile_to_xe (tile );
1808+ struct xe_res_cursor cursor ;
1809+ struct dma_fence * fence = NULL ;
1810+ dma_addr_t * dma_addr ;
1811+ unsigned long page_offset = (unsigned long )buf & ~PAGE_MASK ;
1812+ int bytes_left = len , current_page = 0 ;
1813+ void * orig_buf = buf ;
1814+
1815+ xe_bo_assert_held (bo );
1816+
1817+ /* Use bounce buffer for small access and unaligned access */
1818+ if (len & XE_CACHELINE_MASK ||
1819+ ((uintptr_t )buf | offset ) & XE_CACHELINE_MASK ) {
1820+ int buf_offset = 0 ;
1821+
1822+ /*
1823+ * Less than ideal for large unaligned access but this should be
1824+ * fairly rare, can fixup if this becomes common.
1825+ */
1826+ do {
1827+ u8 bounce [XE_CACHELINE_BYTES ];
1828+ void * ptr = (void * )bounce ;
1829+ int err ;
1830+ int copy_bytes = min_t (int , bytes_left ,
1831+ XE_CACHELINE_BYTES -
1832+ (offset & XE_CACHELINE_MASK ));
1833+ int ptr_offset = offset & XE_CACHELINE_MASK ;
1834+
1835+ err = xe_migrate_access_memory (m , bo ,
1836+ offset &
1837+ ~XE_CACHELINE_MASK ,
1838+ (void * )ptr ,
1839+ sizeof (bounce ), 0 );
1840+ if (err )
1841+ return err ;
1842+
1843+ if (write ) {
1844+ memcpy (ptr + ptr_offset , buf + buf_offset , copy_bytes );
1845+
1846+ err = xe_migrate_access_memory (m , bo ,
1847+ offset & ~XE_CACHELINE_MASK ,
1848+ (void * )ptr ,
1849+ sizeof (bounce ), 0 );
1850+ if (err )
1851+ return err ;
1852+ } else {
1853+ memcpy (buf + buf_offset , ptr + ptr_offset ,
1854+ copy_bytes );
1855+ }
1856+
1857+ bytes_left -= copy_bytes ;
1858+ buf_offset += copy_bytes ;
1859+ offset += copy_bytes ;
1860+ } while (bytes_left );
1861+
1862+ return 0 ;
1863+ }
1864+
1865+ dma_addr = xe_migrate_dma_map (xe , buf , len + page_offset , write );
1866+ if (IS_ERR (dma_addr ))
1867+ return PTR_ERR (dma_addr );
1868+
1869+ xe_res_first (bo -> ttm .resource , offset , bo -> size - offset , & cursor );
1870+
1871+ do {
1872+ struct dma_fence * __fence ;
1873+ u64 vram_addr = vram_region_gpu_offset (bo -> ttm .resource ) +
1874+ cursor .start ;
1875+ int current_bytes ;
1876+
1877+ if (cursor .size > MAX_PREEMPTDISABLE_TRANSFER )
1878+ current_bytes = min_t (int , bytes_left ,
1879+ MAX_PREEMPTDISABLE_TRANSFER );
1880+ else
1881+ current_bytes = min_t (int , bytes_left , cursor .size );
1882+
1883+ if (fence )
1884+ dma_fence_put (fence );
1885+
1886+ __fence = xe_migrate_vram (m , current_bytes ,
1887+ (unsigned long )buf & ~PAGE_MASK ,
1888+ dma_addr + current_page ,
1889+ vram_addr , write ?
1890+ XE_MIGRATE_COPY_TO_VRAM :
1891+ XE_MIGRATE_COPY_TO_SRAM );
1892+ if (IS_ERR (__fence )) {
1893+ if (fence )
1894+ dma_fence_wait (fence , false);
1895+ fence = __fence ;
1896+ goto out_err ;
1897+ }
1898+ fence = __fence ;
1899+
1900+ buf += current_bytes ;
1901+ offset += current_bytes ;
1902+ current_page = (int )(buf - orig_buf ) / PAGE_SIZE ;
1903+ bytes_left -= current_bytes ;
1904+ if (bytes_left )
1905+ xe_res_next (& cursor , current_bytes );
1906+ } while (bytes_left );
1907+
1908+ dma_fence_wait (fence , false);
1909+ dma_fence_put (fence );
1910+
1911+ out_err :
1912+ xe_migrate_dma_unmap (xe , dma_addr , len + page_offset , write );
1913+ return IS_ERR (fence ) ? PTR_ERR (fence ) : 0 ;
1914+ }
17271915
17281916#if IS_ENABLED (CONFIG_DRM_XE_KUNIT_TEST )
17291917#include "tests/xe_migrate.c"
0 commit comments