Skip to content

Commit bb8aa27

Browse files
author
Thomas Hellström
committed
drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
To avoid duplicating the tricky bo locking implementation, Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration. To facilitate this, support ticketlocking from the guarded bo LRU iteration. v2: - Clean up some static function interfaces (Christian König) - Fix Handling -EALREADY from ticketlocking in the loop by skipping to the next item. (Intel CI) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Christian König <christian.koenig@amd.com> Link: https://lore.kernel.org/r/20250623155313.4901-4-thomas.hellstrom@linux.intel.com
1 parent e1e85eb commit bb8aa27

3 files changed

Lines changed: 88 additions & 116 deletions

File tree

drivers/gpu/drm/ttm/ttm_bo_util.c

Lines changed: 75 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -773,16 +773,15 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
773773
return ret;
774774
}
775775

776-
static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
777-
struct ttm_buffer_object *bo,
778-
bool *needs_unlock)
776+
static bool ttm_lru_walk_trylock(struct ttm_bo_lru_cursor *curs,
777+
struct ttm_buffer_object *bo)
779778
{
780-
struct ttm_operation_ctx *ctx = arg->ctx;
779+
struct ttm_operation_ctx *ctx = curs->arg->ctx;
781780

782-
*needs_unlock = false;
781+
curs->needs_unlock = false;
783782

784783
if (dma_resv_trylock(bo->base.resv)) {
785-
*needs_unlock = true;
784+
curs->needs_unlock = true;
786785
return true;
787786
}
788787

@@ -794,10 +793,10 @@ static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
794793
return false;
795794
}
796795

797-
static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
798-
struct ttm_buffer_object *bo,
799-
bool *needs_unlock)
796+
static int ttm_lru_walk_ticketlock(struct ttm_bo_lru_cursor *curs,
797+
struct ttm_buffer_object *bo)
800798
{
799+
struct ttm_lru_walk_arg *arg = curs->arg;
801800
struct dma_resv *resv = bo->base.resv;
802801
int ret;
803802

@@ -807,7 +806,7 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
807806
ret = dma_resv_lock(resv, arg->ticket);
808807

809808
if (!ret) {
810-
*needs_unlock = true;
809+
curs->needs_unlock = true;
811810
/*
812811
* Only a single ticketlock per loop. Ticketlocks are prone
813812
* to return -EDEADLK causing the eviction to fail, so
@@ -823,12 +822,6 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
823822
return ret;
824823
}
825824

826-
static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
827-
{
828-
if (locked)
829-
dma_resv_unlock(bo->base.resv);
830-
}
831-
832825
/**
833826
* ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
834827
* valid items.
@@ -863,64 +856,21 @@ static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
863856
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
864857
struct ttm_resource_manager *man, s64 target)
865858
{
866-
struct ttm_resource_cursor cursor;
867-
struct ttm_resource *res;
859+
struct ttm_bo_lru_cursor cursor;
860+
struct ttm_buffer_object *bo;
868861
s64 progress = 0;
869862
s64 lret;
870863

871-
spin_lock(&bdev->lru_lock);
872-
ttm_resource_cursor_init(&cursor, man);
873-
ttm_resource_manager_for_each_res(&cursor, res) {
874-
struct ttm_buffer_object *bo = res->bo;
875-
bool bo_needs_unlock = false;
876-
bool bo_locked = false;
877-
int mem_type;
878-
879-
/*
880-
* Attempt a trylock before taking a reference on the bo,
881-
* since if we do it the other way around, and the trylock fails,
882-
* we need to drop the lru lock to put the bo.
883-
*/
884-
if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
885-
bo_locked = true;
886-
else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
887-
walk->arg.trylock_only)
888-
continue;
889-
890-
if (!ttm_bo_get_unless_zero(bo)) {
891-
ttm_lru_walk_unlock(bo, bo_needs_unlock);
892-
continue;
893-
}
894-
895-
mem_type = res->mem_type;
896-
spin_unlock(&bdev->lru_lock);
897-
898-
lret = 0;
899-
if (!bo_locked)
900-
lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);
901-
902-
/*
903-
* Note that in between the release of the lru lock and the
904-
* ticketlock, the bo may have switched resource,
905-
* and also memory type, since the resource may have been
906-
* freed and allocated again with a different memory type.
907-
* In that case, just skip it.
908-
*/
909-
if (!lret && bo->resource && bo->resource->mem_type == mem_type)
910-
lret = walk->ops->process_bo(walk, bo);
911-
912-
ttm_lru_walk_unlock(bo, bo_needs_unlock);
913-
ttm_bo_put(bo);
864+
ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) {
865+
lret = walk->ops->process_bo(walk, bo);
914866
if (lret == -EBUSY || lret == -EALREADY)
915867
lret = 0;
916868
progress = (lret < 0) ? lret : progress + lret;
917-
918-
spin_lock(&bdev->lru_lock);
919869
if (progress < 0 || progress >= target)
920870
break;
921871
}
922-
ttm_resource_cursor_fini(&cursor);
923-
spin_unlock(&bdev->lru_lock);
872+
if (IS_ERR(bo))
873+
return PTR_ERR(bo);
924874

925875
return progress;
926876
}
@@ -960,10 +910,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
960910
* @man: The ttm resource_manager whose LRU lists to iterate over.
961911
* @arg: The ttm_lru_walk_arg to govern the walk.
962912
*
963-
* Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
964-
* or prelocked buffer objects are available as detailed by
965-
* @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
966-
* supported.
913+
* Initialize a struct ttm_bo_lru_cursor.
967914
*
968915
* Return: Pointer to @curs. The function does not fail.
969916
*/
@@ -981,21 +928,67 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
981928
EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
982929

983930
static struct ttm_buffer_object *
984-
ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *curs)
931+
__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
985932
{
986-
struct ttm_buffer_object *bo = res->bo;
933+
spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
934+
struct ttm_resource *res = NULL;
935+
struct ttm_buffer_object *bo;
936+
struct ttm_lru_walk_arg *arg = curs->arg;
937+
bool first = !curs->bo;
987938

988-
if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
989-
return NULL;
939+
ttm_bo_lru_cursor_cleanup_bo(curs);
990940

991-
if (!ttm_bo_get_unless_zero(bo)) {
992-
if (curs->needs_unlock)
993-
dma_resv_unlock(bo->base.resv);
994-
return NULL;
941+
spin_lock(lru_lock);
942+
for (;;) {
943+
int mem_type, ret = 0;
944+
bool bo_locked = false;
945+
946+
if (first) {
947+
res = ttm_resource_manager_first(&curs->res_curs);
948+
first = false;
949+
} else {
950+
res = ttm_resource_manager_next(&curs->res_curs);
951+
}
952+
if (!res)
953+
break;
954+
955+
bo = res->bo;
956+
if (ttm_lru_walk_trylock(curs, bo))
957+
bo_locked = true;
958+
else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
959+
continue;
960+
961+
if (!ttm_bo_get_unless_zero(bo)) {
962+
if (curs->needs_unlock)
963+
dma_resv_unlock(bo->base.resv);
964+
continue;
965+
}
966+
967+
mem_type = res->mem_type;
968+
spin_unlock(lru_lock);
969+
if (!bo_locked)
970+
ret = ttm_lru_walk_ticketlock(curs, bo);
971+
972+
/*
973+
* Note that in between the release of the lru lock and the
974+
* ticketlock, the bo may have switched resource,
975+
* and also memory type, since the resource may have been
976+
* freed and allocated again with a different memory type.
977+
* In that case, just skip it.
978+
*/
979+
curs->bo = bo;
980+
if (!ret && bo->resource && bo->resource->mem_type == mem_type)
981+
return bo;
982+
983+
ttm_bo_lru_cursor_cleanup_bo(curs);
984+
if (ret && ret != -EALREADY)
985+
return ERR_PTR(ret);
986+
987+
spin_lock(lru_lock);
995988
}
996989

997-
curs->bo = bo;
998-
return bo;
990+
spin_unlock(lru_lock);
991+
return res ? bo : NULL;
999992
}
1000993

1001994
/**
@@ -1009,25 +1002,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
10091002
*/
10101003
struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
10111004
{
1012-
spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
1013-
struct ttm_resource *res = NULL;
1014-
struct ttm_buffer_object *bo;
1015-
1016-
ttm_bo_lru_cursor_cleanup_bo(curs);
1017-
1018-
spin_lock(lru_lock);
1019-
for (;;) {
1020-
res = ttm_resource_manager_next(&curs->res_curs);
1021-
if (!res)
1022-
break;
1023-
1024-
bo = ttm_bo_from_res_reserved(res, curs);
1025-
if (bo)
1026-
break;
1027-
}
1028-
1029-
spin_unlock(lru_lock);
1030-
return res ? bo : NULL;
1005+
return __ttm_bo_lru_cursor_next(curs);
10311006
}
10321007
EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
10331008

@@ -1041,21 +1016,8 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
10411016
*/
10421017
struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs)
10431018
{
1044-
spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
1045-
struct ttm_buffer_object *bo;
1046-
struct ttm_resource *res;
1047-
1048-
spin_lock(lru_lock);
1049-
res = ttm_resource_manager_first(&curs->res_curs);
1050-
if (!res) {
1051-
spin_unlock(lru_lock);
1052-
return NULL;
1053-
}
1054-
1055-
bo = ttm_bo_from_res_reserved(res, curs);
1056-
spin_unlock(lru_lock);
1057-
1058-
return bo ? bo : ttm_bo_lru_cursor_next(curs);
1019+
ttm_bo_lru_cursor_cleanup_bo(curs);
1020+
return __ttm_bo_lru_cursor_next(curs);
10591021
}
10601022
EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
10611023

drivers/gpu/drm/xe/xe_shrinker.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
6565
struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
6666
struct ttm_bo_lru_cursor curs;
6767
struct ttm_buffer_object *ttm_bo;
68-
struct ttm_lru_walk_arg arg = {.ctx = ctx};
68+
struct ttm_lru_walk_arg arg = {
69+
.ctx = ctx,
70+
.trylock_only = true,
71+
};
6972

7073
if (!man || !man->use_tt)
7174
continue;
@@ -82,6 +85,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
8285
if (*scanned >= to_scan)
8386
break;
8487
}
88+
/* Trylocks should never error, just fail. */
89+
xe_assert(xe, !IS_ERR(ttm_bo));
8590
}
8691

8792
return freed;

include/drm/ttm/ttm_bo.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,10 +529,15 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
529529
* up at looping termination, even if terminated prematurely by, for
530530
* example a return or break statement. Exiting the loop will also unlock
531531
* (if needed) and unreference @_bo.
532+
*
533+
* Return: If locking of a bo returns an error, then iteration is terminated
534+
* and @_bo is set to a corresponding error pointer. It's illegal to
535+
* dereference @_bo after loop exit.
532536
*/
533537
#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \
534538
scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \
535-
for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \
536-
(_bo) = ttm_bo_lru_cursor_next(_cursor))
539+
for ((_bo) = ttm_bo_lru_cursor_first(_cursor); \
540+
!IS_ERR_OR_NULL(_bo); \
541+
(_bo) = ttm_bo_lru_cursor_next(_cursor))
537542

538543
#endif

0 commit comments

Comments
 (0)