@@ -119,28 +119,40 @@ static void svm_range_remove_notifier(struct svm_range *prange)
119119}
120120
121121static int
122- svm_range_dma_map_dev (struct device * dev , dma_addr_t * * dma_addr ,
123- unsigned long * hmm_pfns , uint64_t npages )
122+ svm_range_dma_map_dev (struct amdgpu_device * adev , struct svm_range * prange ,
123+ unsigned long * hmm_pfns , uint32_t gpuidx )
124124{
125125 enum dma_data_direction dir = DMA_BIDIRECTIONAL ;
126- dma_addr_t * addr = * dma_addr ;
126+ dma_addr_t * addr = prange -> dma_addr [gpuidx ];
127+ struct device * dev = adev -> dev ;
127128 struct page * page ;
128129 int i , r ;
129130
130131 if (!addr ) {
131- addr = kvmalloc_array (npages , sizeof (* addr ),
132+ addr = kvmalloc_array (prange -> npages , sizeof (* addr ),
132133 GFP_KERNEL | __GFP_ZERO );
133134 if (!addr )
134135 return - ENOMEM ;
135- * dma_addr = addr ;
136+ prange -> dma_addr [ gpuidx ] = addr ;
136137 }
137138
138- for (i = 0 ; i < npages ; i ++ ) {
139+ for (i = 0 ; i < prange -> npages ; i ++ ) {
139140 if (WARN_ONCE (addr [i ] && !dma_mapping_error (dev , addr [i ]),
140141 "leaking dma mapping\n" ))
141142 dma_unmap_page (dev , addr [i ], PAGE_SIZE , dir );
142143
143144 page = hmm_pfn_to_page (hmm_pfns [i ]);
145+ if (is_zone_device_page (page )) {
146+ struct amdgpu_device * bo_adev =
147+ amdgpu_ttm_adev (prange -> svm_bo -> bo -> tbo .bdev );
148+
149+ addr [i ] = (hmm_pfns [i ] << PAGE_SHIFT ) +
150+ bo_adev -> vm_manager .vram_base_offset -
151+ bo_adev -> kfd .dev -> pgmap .range .start ;
152+ addr [i ] |= SVM_RANGE_VRAM_DOMAIN ;
153+ pr_debug ("vram address detected: 0x%llx\n" , addr [i ]);
154+ continue ;
155+ }
144156 addr [i ] = dma_map_page (dev , page , 0 , PAGE_SIZE , dir );
145157 r = dma_mapping_error (dev , addr [i ]);
146158 if (r ) {
@@ -175,8 +187,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
175187 }
176188 adev = (struct amdgpu_device * )pdd -> dev -> kgd ;
177189
178- r = svm_range_dma_map_dev (adev -> dev , & prange -> dma_addr [gpuidx ],
179- hmm_pfns , prange -> npages );
190+ r = svm_range_dma_map_dev (adev , prange , hmm_pfns , gpuidx );
180191 if (r )
181192 break ;
182193 }
@@ -1020,21 +1031,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
10201031}
10211032
10221033static uint64_t
1023- svm_range_get_pte_flags (struct amdgpu_device * adev , struct svm_range * prange )
1034+ svm_range_get_pte_flags (struct amdgpu_device * adev , struct svm_range * prange ,
1035+ int domain )
10241036{
10251037 struct amdgpu_device * bo_adev ;
10261038 uint32_t flags = prange -> flags ;
10271039 uint32_t mapping_flags = 0 ;
10281040 uint64_t pte_flags ;
1029- bool snoop = ! prange -> ttm_res ;
1041+ bool snoop = ( domain != SVM_RANGE_VRAM_DOMAIN ) ;
10301042 bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT ;
10311043
1032- if (prange -> svm_bo && prange -> ttm_res )
1044+ if (domain == SVM_RANGE_VRAM_DOMAIN )
10331045 bo_adev = amdgpu_ttm_adev (prange -> svm_bo -> bo -> tbo .bdev );
10341046
10351047 switch (adev -> asic_type ) {
10361048 case CHIP_ARCTURUS :
1037- if (prange -> svm_bo && prange -> ttm_res ) {
1049+ if (domain == SVM_RANGE_VRAM_DOMAIN ) {
10381050 if (bo_adev == adev ) {
10391051 mapping_flags |= coherent ?
10401052 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW ;
@@ -1050,7 +1062,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
10501062 }
10511063 break ;
10521064 case CHIP_ALDEBARAN :
1053- if (prange -> svm_bo && prange -> ttm_res ) {
1065+ if (domain == SVM_RANGE_VRAM_DOMAIN ) {
10541066 if (bo_adev == adev ) {
10551067 mapping_flags |= coherent ?
10561068 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW ;
@@ -1080,14 +1092,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
10801092 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE ;
10811093
10821094 pte_flags = AMDGPU_PTE_VALID ;
1083- pte_flags |= prange -> ttm_res ? 0 : AMDGPU_PTE_SYSTEM ;
1095+ pte_flags |= ( domain == SVM_RANGE_VRAM_DOMAIN ) ? 0 : AMDGPU_PTE_SYSTEM ;
10841096 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0 ;
10851097
10861098 pte_flags |= amdgpu_gem_va_map_flags (adev , mapping_flags );
10871099
10881100 pr_debug ("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n" ,
10891101 prange -> svms , prange -> start , prange -> last ,
1090- prange -> ttm_res ? 1 :0 , pte_flags , mapping_flags );
1102+ ( domain == SVM_RANGE_VRAM_DOMAIN ) ? 1 :0 , pte_flags , mapping_flags );
10911103
10921104 return pte_flags ;
10931105}
@@ -1158,31 +1170,41 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
11581170 struct amdgpu_bo_va bo_va ;
11591171 bool table_freed = false;
11601172 uint64_t pte_flags ;
1173+ unsigned long last_start ;
1174+ int last_domain ;
11611175 int r = 0 ;
1176+ int64_t i ;
11621177
11631178 pr_debug ("svms 0x%p [0x%lx 0x%lx]\n" , prange -> svms , prange -> start ,
11641179 prange -> last );
11651180
1166- if (prange -> svm_bo && prange -> ttm_res ) {
1181+ if (prange -> svm_bo && prange -> ttm_res )
11671182 bo_va .is_xgmi = amdgpu_xgmi_same_hive (adev , bo_adev );
1168- prange -> mapping .bo_va = & bo_va ;
1169- }
11701183
1171- prange -> mapping .start = prange -> start ;
1172- prange -> mapping .last = prange -> last ;
1173- prange -> mapping .offset = prange -> ttm_res ? prange -> offset : 0 ;
1174- pte_flags = svm_range_get_pte_flags (adev , prange );
1184+ last_start = prange -> start ;
1185+ for (i = 0 ; i < prange -> npages ; i ++ ) {
1186+ last_domain = dma_addr [i ] & SVM_RANGE_VRAM_DOMAIN ;
1187+ dma_addr [i ] &= ~SVM_RANGE_VRAM_DOMAIN ;
1188+ if ((prange -> start + i ) < prange -> last &&
1189+ last_domain == (dma_addr [i + 1 ] & SVM_RANGE_VRAM_DOMAIN ))
1190+ continue ;
11751191
1176- r = amdgpu_vm_bo_update_mapping (adev , bo_adev , vm , false, false, NULL ,
1177- prange -> mapping .start ,
1178- prange -> mapping .last , pte_flags ,
1179- prange -> mapping .offset ,
1180- prange -> ttm_res ,
1181- dma_addr , & vm -> last_update ,
1182- & table_freed );
1183- if (r ) {
1184- pr_debug ("failed %d to map to gpu 0x%lx\n" , r , prange -> start );
1185- goto out ;
1192+ pr_debug ("Mapping range [0x%lx 0x%llx] on domain: %s\n" ,
1193+ last_start , prange -> start + i , last_domain ? "GPU" : "CPU" );
1194+ pte_flags = svm_range_get_pte_flags (adev , prange , last_domain );
1195+ r = amdgpu_vm_bo_update_mapping (adev , bo_adev , vm , false, false, NULL ,
1196+ last_start ,
1197+ prange -> start + i , pte_flags ,
1198+ last_start - prange -> start ,
1199+ NULL ,
1200+ dma_addr ,
1201+ & vm -> last_update ,
1202+ & table_freed );
1203+ if (r ) {
1204+ pr_debug ("failed %d to map to gpu 0x%lx\n" , r , prange -> start );
1205+ goto out ;
1206+ }
1207+ last_start = prange -> start + i + 1 ;
11861208 }
11871209
11881210 r = amdgpu_vm_update_pdes (adev , vm , false);
@@ -1203,7 +1225,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
12031225 p -> pasid , TLB_FLUSH_LEGACY );
12041226 }
12051227out :
1206- prange -> mapping .bo_va = NULL ;
12071228 return r ;
12081229}
12091230
0 commit comments