Skip to content

Commit 5bde069

Browse files
committed
Merge tag 'drm-intel-fixes-2022-07-13' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- Selftest fix (Andrzej) - TTM fix sg_table construction (Matt Auld) - Error return fixes (Dan) - Fix a performance regression related to waitboost (Chris) - Fix GT resets (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Ys87yMujcG2sJC1R@intel.com
2 parents b1f4347 + 333991c commit 5bde069

13 files changed

Lines changed: 142 additions & 33 deletions

drivers/gpu/drm/i915/gem/i915_gem_ttm.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
620620
struct ttm_resource *res)
621621
{
622622
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
623+
u64 page_alignment;
623624

624625
if (!i915_ttm_gtt_binds_lmem(res))
625626
return i915_ttm_tt_get_st(bo->ttm);
626627

628+
page_alignment = bo->page_alignment << PAGE_SHIFT;
629+
if (!page_alignment)
630+
page_alignment = obj->mm.region->min_page_size;
631+
627632
/*
628633
* If CPU mapping differs, we need to add the ttm_tt pages to
629634
* the resulting st. Might make sense for GGTT.
@@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
634639
struct i915_refct_sgt *rsgt;
635640

636641
rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
637-
res);
642+
res,
643+
page_alignment);
638644
if (IS_ERR(rsgt))
639645
return rsgt;
640646

@@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
643649
return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
644650
}
645651

646-
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
652+
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res,
653+
page_alignment);
647654
}
648655

649656
static int i915_ttm_truncate(struct drm_i915_gem_object *obj)

drivers/gpu/drm/i915/gem/i915_gem_wait.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/jiffies.h>
1010

1111
#include "gt/intel_engine.h"
12+
#include "gt/intel_rps.h"
1213

1314
#include "i915_gem_ioctls.h"
1415
#include "i915_gem_object.h"
@@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
3132
timeout);
3233
}
3334

35+
static void
36+
i915_gem_object_boost(struct dma_resv *resv, unsigned int flags)
37+
{
38+
struct dma_resv_iter cursor;
39+
struct dma_fence *fence;
40+
41+
/*
42+
* Prescan all fences for potential boosting before we begin waiting.
43+
*
44+
* When we wait, we wait on outstanding fences serially. If the
45+
* dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced
46+
* form 1:2, then as we look at each wait in turn we see that each
47+
* request is currently executing and not worthy of boosting. But if
48+
* we only happen to look at the final fence in the sequence (because
49+
* of request coalescing or splitting between read/write arrays by
50+
* the iterator), then we would boost. As such our decision to boost
51+
* or not is delicately balanced on the order we wait on fences.
52+
*
53+
* So instead of looking for boosts sequentially, look for all boosts
54+
* upfront and then wait on the outstanding fences.
55+
*/
56+
57+
dma_resv_iter_begin(&cursor, resv,
58+
dma_resv_usage_rw(flags & I915_WAIT_ALL));
59+
dma_resv_for_each_fence_unlocked(&cursor, fence)
60+
if (dma_fence_is_i915(fence) &&
61+
!i915_request_started(to_request(fence)))
62+
intel_rps_boost(to_request(fence));
63+
dma_resv_iter_end(&cursor);
64+
}
65+
3466
static long
3567
i915_gem_object_wait_reservation(struct dma_resv *resv,
3668
unsigned int flags,
@@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
4072
struct dma_fence *fence;
4173
long ret = timeout ?: 1;
4274

75+
i915_gem_object_boost(resv, flags);
76+
4377
dma_resv_iter_begin(&cursor, resv,
4478
dma_resv_usage_rw(flags & I915_WAIT_ALL));
4579
dma_resv_for_each_fence_unlocked(&cursor, fence) {

drivers/gpu/drm/i915/gt/intel_gt.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
12091209
mutex_lock(&gt->tlb_invalidate_lock);
12101210
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
12111211

1212+
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
1213+
1214+
for_each_engine(engine, gt, id) {
1215+
struct reg_and_bit rb;
1216+
1217+
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1218+
if (!i915_mmio_reg_offset(rb.reg))
1219+
continue;
1220+
1221+
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1222+
}
1223+
1224+
spin_unlock_irq(&uncore->lock);
1225+
12121226
for_each_engine(engine, gt, id) {
12131227
/*
12141228
* HW architecture suggest typical invalidation time at 40us,
@@ -1223,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
12231237
if (!i915_mmio_reg_offset(rb.reg))
12241238
continue;
12251239

1226-
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
12271240
if (__intel_wait_for_register_fw(uncore,
12281241
rb.reg, rb.bit, 0,
12291242
timeout_us, timeout_ms,

drivers/gpu/drm/i915/gt/intel_reset.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
300300
return err;
301301
}
302302

303-
static int gen6_reset_engines(struct intel_gt *gt,
304-
intel_engine_mask_t engine_mask,
305-
unsigned int retry)
303+
static int __gen6_reset_engines(struct intel_gt *gt,
304+
intel_engine_mask_t engine_mask,
305+
unsigned int retry)
306306
{
307307
struct intel_engine_cs *engine;
308308
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
321321
return gen6_hw_domain_reset(gt, hw_mask);
322322
}
323323

324+
static int gen6_reset_engines(struct intel_gt *gt,
325+
intel_engine_mask_t engine_mask,
326+
unsigned int retry)
327+
{
328+
unsigned long flags;
329+
int ret;
330+
331+
spin_lock_irqsave(&gt->uncore->lock, flags);
332+
ret = __gen6_reset_engines(gt, engine_mask, retry);
333+
spin_unlock_irqrestore(&gt->uncore->lock, flags);
334+
335+
return ret;
336+
}
337+
324338
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
325339
{
326340
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
487501
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
488502
}
489503

490-
static int gen11_reset_engines(struct intel_gt *gt,
491-
intel_engine_mask_t engine_mask,
492-
unsigned int retry)
504+
static int __gen11_reset_engines(struct intel_gt *gt,
505+
intel_engine_mask_t engine_mask,
506+
unsigned int retry)
493507
{
494508
struct intel_engine_cs *engine;
495509
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
583597
struct intel_engine_cs *engine;
584598
const bool reset_non_ready = retry >= 1;
585599
intel_engine_mask_t tmp;
600+
unsigned long flags;
586601
int ret;
587602

603+
spin_lock_irqsave(&gt->uncore->lock, flags);
604+
588605
for_each_engine_masked(engine, gt, engine_mask, tmp) {
589606
ret = gen8_engine_reset_prepare(engine);
590607
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
612629
* This is best effort, so ignore any error from the initial reset.
613630
*/
614631
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
615-
gen11_reset_engines(gt, gt->info.engine_mask, 0);
632+
__gen11_reset_engines(gt, gt->info.engine_mask, 0);
616633

617634
if (GRAPHICS_VER(gt->i915) >= 11)
618-
ret = gen11_reset_engines(gt, engine_mask, retry);
635+
ret = __gen11_reset_engines(gt, engine_mask, retry);
619636
else
620-
ret = gen6_reset_engines(gt, engine_mask, retry);
637+
ret = __gen6_reset_engines(gt, engine_mask, retry);
621638

622639
skip_reset:
623640
for_each_engine_masked(engine, gt, engine_mask, tmp)
624641
gen8_engine_reset_cancel(engine);
625642

643+
spin_unlock_irqrestore(&gt->uncore->lock, flags);
644+
626645
return ret;
627646
}
628647

drivers/gpu/drm/i915/gt/selftest_lrc.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg)
176176
continue;
177177

178178
hw = shmem_pin_map(engine->default_state);
179-
if (IS_ERR(hw)) {
180-
err = PTR_ERR(hw);
179+
if (!hw) {
180+
err = -ENOMEM;
181181
break;
182182
}
183183
hw += LRC_STATE_OFFSET / sizeof(*hw);
@@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg)
365365
continue;
366366

367367
hw = shmem_pin_map(engine->default_state);
368-
if (IS_ERR(hw)) {
369-
err = PTR_ERR(hw);
368+
if (!hw) {
369+
err = -ENOMEM;
370370
break;
371371
}
372372
hw += LRC_STATE_OFFSET / sizeof(*hw);

drivers/gpu/drm/i915/gvt/cmd_parser.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
31173117
continue;
31183118

31193119
vaddr = shmem_pin_map(engine->default_state);
3120-
if (IS_ERR(vaddr)) {
3121-
gvt_err("failed to map %s->default state, err:%zd\n",
3122-
engine->name, PTR_ERR(vaddr));
3120+
if (!vaddr) {
3121+
gvt_err("failed to map %s->default state\n",
3122+
engine->name);
31233123
return;
31243124
}
31253125

drivers/gpu/drm/i915/i915_scatterlist.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
6868
* drm_mm_node
6969
* @node: The drm_mm_node.
7070
* @region_start: An offset to add to the dma addresses of the sg list.
71+
* @page_alignment: Required page alignment for each sg entry. Power of two.
7172
*
7273
* Create a struct sg_table, initializing it from a struct drm_mm_node,
7374
* taking a maximum segment length into account, splitting into segments
@@ -77,15 +78,18 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
7778
* error code cast to an error pointer on failure.
7879
*/
7980
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
80-
u64 region_start)
81+
u64 region_start,
82+
u64 page_alignment)
8183
{
82-
const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
84+
const u64 max_segment = round_down(UINT_MAX, page_alignment);
8385
u64 segment_pages = max_segment >> PAGE_SHIFT;
8486
u64 block_size, offset, prev_end;
8587
struct i915_refct_sgt *rsgt;
8688
struct sg_table *st;
8789
struct scatterlist *sg;
8890

91+
GEM_BUG_ON(!max_segment);
92+
8993
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
9094
if (!rsgt)
9195
return ERR_PTR(-ENOMEM);
@@ -112,6 +116,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
112116
sg = __sg_next(sg);
113117

114118
sg_dma_address(sg) = region_start + offset;
119+
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
120+
page_alignment));
115121
sg_dma_len(sg) = 0;
116122
sg->length = 0;
117123
st->nents++;
@@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
138144
* i915_buddy_block list
139145
* @res: The struct i915_ttm_buddy_resource.
140146
* @region_start: An offset to add to the dma addresses of the sg list.
147+
* @page_alignment: Required page alignment for each sg entry. Power of two.
141148
*
142149
* Create a struct sg_table, initializing it from struct i915_buddy_block list,
143150
* taking a maximum segment length into account, splitting into segments
@@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
147154
* error code cast to an error pointer on failure.
148155
*/
149156
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
150-
u64 region_start)
157+
u64 region_start,
158+
u64 page_alignment)
151159
{
152160
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
153161
const u64 size = res->num_pages << PAGE_SHIFT;
154-
const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
162+
const u64 max_segment = round_down(UINT_MAX, page_alignment);
155163
struct drm_buddy *mm = bman_res->mm;
156164
struct list_head *blocks = &bman_res->blocks;
157165
struct drm_buddy_block *block;
@@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
161169
resource_size_t prev_end;
162170

163171
GEM_BUG_ON(list_empty(blocks));
172+
GEM_BUG_ON(!max_segment);
164173

165174
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
166175
if (!rsgt)
@@ -191,6 +200,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
191200
sg = __sg_next(sg);
192201

193202
sg_dma_address(sg) = region_start + offset;
203+
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
204+
page_alignment));
194205
sg_dma_len(sg) = 0;
195206
sg->length = 0;
196207
st->nents++;

drivers/gpu/drm/i915/i915_scatterlist.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
213213
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
214214

215215
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
216-
u64 region_start);
216+
u64 region_start,
217+
u64 page_alignment);
217218

218219
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
219-
u64 region_start);
220+
u64 region_start,
221+
u64 page_alignment);
220222

221223
#endif

drivers/gpu/drm/i915/intel_region_ttm.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
152152
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
153153
* @mem: The memory region.
154154
* @res: The resource manager resource obtained from the TTM resource manager.
155+
* @page_alignment: Required page alignment for each sg entry. Power of two.
155156
*
156157
* The gem backends typically use sg-tables for operations on the underlying
157158
* io_memory. So provide a way for the backends to translate the
@@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
161162
*/
162163
struct i915_refct_sgt *
163164
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
164-
struct ttm_resource *res)
165+
struct ttm_resource *res,
166+
u64 page_alignment)
165167
{
166168
if (mem->is_range_manager) {
167169
struct ttm_range_mgr_node *range_node =
168170
to_ttm_range_mgr_node(res);
169171

170172
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
171-
mem->region.start);
173+
mem->region.start,
174+
page_alignment);
172175
} else {
173-
return i915_rsgt_from_buddy_resource(res, mem->region.start);
176+
return i915_rsgt_from_buddy_resource(res, mem->region.start,
177+
page_alignment);
174178
}
175179
}
176180

drivers/gpu/drm/i915/intel_region_ttm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem);
2424

2525
struct i915_refct_sgt *
2626
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
27-
struct ttm_resource *res);
27+
struct ttm_resource *res,
28+
u64 page_alignment);
2829

2930
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
3031
struct ttm_resource *res);

0 commit comments

Comments
 (0)