Skip to content

Commit c4d53e5

Browse files
committed
nouveau/vmm: rewrite pte tracker using a struct and bitfields.
I want to increase the counters here and start tracking LPTs as well as there are certain situations where userspace with mixed page sizes can cause ref/unrefs to live longer so need better reference counting. This should be entirely non-functional. Reviewed-by: Mary Guillemard <mary@mary.zone> Tested-by: Mary Guillemard <mary@mary.zone> Tested-by: Mel Henning <mhenning@darkrefraction.com> Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patch.msgid.link/20260204030208.2313241-2-airlied@gmail.com
1 parent 750817a commit c4d53e5

2 files changed

Lines changed: 31 additions & 24 deletions

File tree

drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
5353
}
5454
}
5555

56-
if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
56+
if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL)))
5757
return NULL;
5858
pgt->page = page ? page->shift : 0;
5959
pgt->sparse = sparse;
@@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
208208
*/
209209
for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
210210
const u32 pten = min(sptn - spti, ptes);
211-
pgt->pte[lpti] -= pten;
211+
pgt->pte[lpti].s.sptes -= pten;
212212
ptes -= pten;
213213
}
214214

@@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
218218

219219
for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
220220
/* Skip over any LPTEs that still have valid SPTEs. */
221-
if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
221+
if (pgt->pte[pteb].s.sptes) {
222222
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
223-
if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
223+
if (!(pgt->pte[ptei].s.sptes))
224224
break;
225225
}
226226
continue;
@@ -232,14 +232,14 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
232232
*
233233
* Determine how many LPTEs need to transition state.
234234
*/
235-
pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
235+
pgt->pte[ptei].s.spte_valid = false;
236236
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
237-
if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
237+
if (pgt->pte[ptei].s.sptes)
238238
break;
239-
pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
239+
pgt->pte[ptei].s.spte_valid = false;
240240
}
241241

242-
if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
242+
if (pgt->pte[pteb].s.sparse) {
243243
TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
244244
pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
245245
} else
@@ -307,7 +307,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
307307
*/
308308
for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
309309
const u32 pten = min(sptn - spti, ptes);
310-
pgt->pte[lpti] += pten;
310+
pgt->pte[lpti].s.sptes += pten;
311311
ptes -= pten;
312312
}
313313

@@ -317,9 +317,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
317317

318318
for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
319319
/* Skip over any LPTEs that already have valid SPTEs. */
320-
if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
320+
if (pgt->pte[pteb].s.spte_valid) {
321321
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
322-
if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
322+
if (!pgt->pte[ptei].s.spte_valid)
323323
break;
324324
}
325325
continue;
@@ -331,14 +331,14 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
331331
*
332332
* Determine how many LPTEs need to transition state.
333333
*/
334-
pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
334+
pgt->pte[ptei].s.spte_valid = true;
335335
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
336-
if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
336+
if (pgt->pte[ptei].s.spte_valid)
337337
break;
338-
pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
338+
pgt->pte[ptei].s.spte_valid = true;
339339
}
340340

341-
if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
341+
if (pgt->pte[pteb].s.sparse) {
342342
const u32 spti = pteb * sptn;
343343
const u32 sptc = ptes * sptn;
344344
/* The entire LPTE is marked as sparse, we need
@@ -386,7 +386,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
386386
pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
387387
} else
388388
if (desc->type == LPT) {
389-
memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
389+
union nvkm_pte_tracker sparse = { .s.sparse = 1 };
390+
memset(&pgt->pte[ptei].u, sparse.u, ptes);
390391
}
391392
}
392393

@@ -398,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte
398399
memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
399400
else
400401
if (it->desc->type == LPT)
401-
memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
402+
memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes);
402403
return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
403404
}
404405

@@ -445,9 +446,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
445446
* the SPTEs on some GPUs.
446447
*/
447448
for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
448-
bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
449+
bool spte = !!pgt->pte[ptei].s.sptes;
449450
for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
450-
bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
451+
bool next = !!pgt->pte[ptei].s.sptes;
451452
if (spte != next)
452453
break;
453454
}
@@ -461,7 +462,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
461462
} else {
462463
desc->func->unmap(vmm, pt, pteb, ptes);
463464
while (ptes--)
464-
pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
465+
pgt->pte[pteb++].s.spte_valid = true;
465466
}
466467
}
467468
} else {

drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@
44
#include <core/memory.h>
55
enum nvkm_memory_target;
66

7+
union nvkm_pte_tracker {
8+
u8 u;
9+
struct {
10+
u8 sparse:1;
11+
u8 spte_valid:1;
12+
u8 sptes:6;
13+
} s;
14+
};
15+
716
struct nvkm_vmm_pt {
817
/* Some GPUs have a mapping level with a dual page tables to
918
* support large and small pages in the same address-range.
@@ -44,10 +53,7 @@ struct nvkm_vmm_pt {
4453
*
4554
* This information is used to manage LPTE state transitions.
4655
*/
47-
#define NVKM_VMM_PTE_SPARSE 0x80
48-
#define NVKM_VMM_PTE_VALID 0x40
49-
#define NVKM_VMM_PTE_SPTES 0x3f
50-
u8 pte[];
56+
union nvkm_pte_tracker pte[];
5157
};
5258

5359
typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *,

0 commit comments

Comments
 (0)