Skip to content

Commit 8a7f77f

Browse files
committed
drm/i915: Implement GGTT update method with MI_UPDATE_GTT
Implement GGTT update method with blitter command, MI_UPDATE_GTT and install those handlers if a platform requires that. v2: Make sure we hold the GT wakeref and Blitter engine wakeref before we call mutex_lock/intel_context_enter below. When GT/engine are not awake, the intel_context_enter calls into some runtime pm function which can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a mutex lock is not allowed because shrinker can also try to hold the same mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref before calling mutex_lock, to fix the circular lock. Signed-off-by: Nirmoy Das <nirmoy.das@intel.com> Signed-off-by: Oak Zeng <oak.zeng@intel.com> Acked-by: Oak Zeng <oak.zeng@intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230926083742.14740-6-nirmoy.das@intel.com
1 parent 3f5f628 commit 8a7f77f

1 file changed

Lines changed: 235 additions & 0 deletions

File tree

drivers/gpu/drm/i915/gt/intel_ggtt.c

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,23 @@
1515
#include "display/intel_display.h"
1616
#include "gem/i915_gem_lmem.h"
1717

18+
#include "intel_context.h"
1819
#include "intel_ggtt_gmch.h"
20+
#include "intel_gpu_commands.h"
1921
#include "intel_gt.h"
2022
#include "intel_gt_regs.h"
2123
#include "intel_pci_config.h"
24+
#include "intel_ring.h"
2225
#include "i915_drv.h"
2326
#include "i915_pci.h"
27+
#include "i915_request.h"
2428
#include "i915_scatterlist.h"
2529
#include "i915_utils.h"
2630
#include "i915_vgpu.h"
2731

2832
#include "intel_gtt.h"
2933
#include "gen8_ppgtt.h"
34+
#include "intel_engine_pm.h"
3035

3136
static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
3237
unsigned long color,
@@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
252257
return pte;
253258
}
254259

260+
static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
261+
{
262+
struct intel_gt *gt = ggtt->vm.gt;
263+
264+
return intel_gt_is_bind_context_ready(gt);
265+
}
266+
267+
static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
268+
{
269+
struct intel_context *ce;
270+
struct intel_gt *gt = ggtt->vm.gt;
271+
272+
if (intel_gt_is_wedged(gt))
273+
return NULL;
274+
275+
ce = gt->engine[BCS0]->bind_context;
276+
GEM_BUG_ON(!ce);
277+
278+
/*
279+
* If the GT is not awake already at this stage then fallback
280+
* to pci based GGTT update otherwise __intel_wakeref_get_first()
281+
* would conflict with fs_reclaim trying to allocate memory while
282+
* doing rpm_resume().
283+
*/
284+
if (!intel_gt_pm_get_if_awake(gt))
285+
return NULL;
286+
287+
intel_engine_pm_get(ce->engine);
288+
289+
return ce;
290+
}
291+
292+
static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
293+
{
294+
intel_engine_pm_put(ce->engine);
295+
intel_gt_pm_put(ce->engine->gt);
296+
}
297+
298+
static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
299+
struct sg_table *pages, u32 num_entries,
300+
const gen8_pte_t pte)
301+
{
302+
struct i915_sched_attr attr = {};
303+
struct intel_gt *gt = ggtt->vm.gt;
304+
const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
305+
struct sgt_iter iter;
306+
struct i915_request *rq;
307+
struct intel_context *ce;
308+
u32 *cs;
309+
310+
if (!num_entries)
311+
return true;
312+
313+
ce = gen8_ggtt_bind_get_ce(ggtt);
314+
if (!ce)
315+
return false;
316+
317+
if (pages)
318+
iter = __sgt_iter(pages->sgl, true);
319+
320+
while (num_entries) {
321+
int count = 0;
322+
dma_addr_t addr;
323+
/*
324+
* MI_UPDATE_GTT can update 512 entries in a single command but
325+
* that end up with engine reset, 511 works.
326+
*/
327+
u32 n_ptes = min_t(u32, 511, num_entries);
328+
329+
if (mutex_lock_interruptible(&ce->timeline->mutex))
330+
goto put_ce;
331+
332+
intel_context_enter(ce);
333+
rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
334+
intel_context_exit(ce);
335+
if (IS_ERR(rq)) {
336+
GT_TRACE(gt, "Failed to get bind request\n");
337+
mutex_unlock(&ce->timeline->mutex);
338+
goto put_ce;
339+
}
340+
341+
cs = intel_ring_begin(rq, 2 * n_ptes + 2);
342+
if (IS_ERR(cs)) {
343+
GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
344+
i915_request_set_error_once(rq, PTR_ERR(cs));
345+
/* once a request is created, it must be queued */
346+
goto queue_err_rq;
347+
}
348+
349+
*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
350+
*cs++ = offset << 12;
351+
352+
if (pages) {
353+
for_each_sgt_daddr_next(addr, iter) {
354+
if (count == n_ptes)
355+
break;
356+
*cs++ = lower_32_bits(pte | addr);
357+
*cs++ = upper_32_bits(pte | addr);
358+
count++;
359+
}
360+
/* fill remaining with scratch pte, if any */
361+
if (count < n_ptes) {
362+
memset64((u64 *)cs, scratch_pte,
363+
n_ptes - count);
364+
cs += (n_ptes - count) * 2;
365+
}
366+
} else {
367+
memset64((u64 *)cs, pte, n_ptes);
368+
cs += n_ptes * 2;
369+
}
370+
371+
intel_ring_advance(rq, cs);
372+
queue_err_rq:
373+
i915_request_get(rq);
374+
__i915_request_commit(rq);
375+
__i915_request_queue(rq, &attr);
376+
377+
mutex_unlock(&ce->timeline->mutex);
378+
/* This will break if the request is complete or after engine reset */
379+
i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
380+
if (rq->fence.error)
381+
goto err_rq;
382+
383+
i915_request_put(rq);
384+
385+
num_entries -= n_ptes;
386+
offset += n_ptes;
387+
}
388+
389+
gen8_ggtt_bind_put_ce(ce);
390+
return true;
391+
392+
err_rq:
393+
i915_request_put(rq);
394+
put_ce:
395+
gen8_ggtt_bind_put_ce(ce);
396+
return false;
397+
}
398+
255399
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
256400
{
257401
writeq(pte, addr);
@@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
272416
ggtt->invalidate(ggtt);
273417
}
274418

419+
static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
420+
dma_addr_t addr, u64 offset,
421+
unsigned int pat_index, u32 flags)
422+
{
423+
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
424+
gen8_pte_t pte;
425+
426+
pte = ggtt->vm.pte_encode(addr, pat_index, flags);
427+
if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
428+
gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
429+
return ggtt->invalidate(ggtt);
430+
431+
gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
432+
}
433+
275434
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
276435
struct i915_vma_resource *vma_res,
277436
unsigned int pat_index,
@@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
311470
ggtt->invalidate(ggtt);
312471
}
313472

473+
static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
474+
struct i915_vma_resource *vma_res,
475+
unsigned int pat_index, u32 flags)
476+
{
477+
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
478+
gen8_pte_t scratch_pte = vm->scratch[0]->encode;
479+
gen8_pte_t pte_encode;
480+
u64 start, end;
481+
482+
pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
483+
start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
484+
end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
485+
if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
486+
goto err;
487+
488+
start = end;
489+
end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
490+
if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
491+
vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
492+
goto err;
493+
494+
start += vma_res->node_size / I915_GTT_PAGE_SIZE;
495+
if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
496+
goto err;
497+
498+
return true;
499+
500+
err:
501+
return false;
502+
}
503+
504+
static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
505+
struct i915_vma_resource *vma_res,
506+
unsigned int pat_index, u32 flags)
507+
{
508+
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
509+
510+
if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
511+
__gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
512+
return ggtt->invalidate(ggtt);
513+
514+
gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
515+
}
516+
314517
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
315518
u64 start, u64 length)
316519
{
@@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
332535
gen8_set_pte(&gtt_base[i], scratch_pte);
333536
}
334537

538+
static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
539+
u64 start, u64 length)
540+
{
541+
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
542+
unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
543+
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
544+
const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
545+
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
546+
547+
if (WARN(num_entries > max_entries,
548+
"First entry = %d; Num entries = %d (max=%d)\n",
549+
first_entry, num_entries, max_entries))
550+
num_entries = max_entries;
551+
552+
if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
553+
NULL, num_entries, scratch_pte))
554+
return ggtt->invalidate(ggtt);
555+
556+
gen8_ggtt_clear_range(vm, start, length);
557+
}
558+
335559
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
336560
dma_addr_t addr,
337561
u64 offset,
@@ -1008,6 +1232,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
10081232
I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
10091233
}
10101234

1235+
if (i915_ggtt_require_binder(i915)) {
1236+
ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
1237+
ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
1238+
ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
1239+
/*
1240+
* On GPU is hung, we might bind VMAs for error capture.
1241+
* Fallback to CPU GGTT updates in that case.
1242+
*/
1243+
ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1244+
}
1245+
10111246
if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
10121247
ggtt->invalidate = guc_ggtt_invalidate;
10131248
else

0 commit comments

Comments
 (0)