Skip to content

Commit abceb42

Browse files
Stanislav Kinsburskiiliuw
authored andcommitted
mshv: Fix huge page handling in memory region traversal
The previous code assumed that if a region's first page was huge, the entire region consisted of huge pages and stored this in a large_pages flag. This premise is incorrect not only for movable regions (where pages can be split and merged on invalidate callbacks or page faults), but even for pinned regions: THPs can be split and merged during allocation, so a large, pinned region may contain a mix of huge and regular pages. This change removes the large_pages flag and replaces region-wide assumptions with per-chunk inspection of the actual page size when mapping, unmapping, sharing, and unsharing. This makes huge page handling correct for mixed-page regions and avoids relying on stale metadata that can easily become invalid as memory is remapped. Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> Reviewed-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com> Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org>
1 parent e950c30 commit abceb42

2 files changed

Lines changed: 191 additions & 31 deletions

File tree

drivers/hv/mshv_regions.c

Lines changed: 190 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,124 @@
1414

1515
#include "mshv_root.h"
1616

17+
/**
18+
* mshv_region_process_chunk - Processes a contiguous chunk of memory pages
19+
* in a region.
20+
* @region : Pointer to the memory region structure.
21+
* @flags : Flags to pass to the handler.
22+
* @page_offset: Offset into the region's pages array to start processing.
23+
* @page_count : Number of pages to process.
24+
* @handler : Callback function to handle the chunk.
25+
*
26+
* This function scans the region's pages starting from @page_offset,
27+
* checking for contiguous present pages of the same size (normal or huge).
28+
* It invokes @handler for the chunk of contiguous pages found. Returns the
29+
* number of pages handled, or a negative error code if the first page is
30+
* not present or the handler fails.
31+
*
32+
* Note: The @handler callback must be able to handle both normal and huge
33+
* pages.
34+
*
35+
* Return: Number of pages handled, or negative error code.
36+
*/
37+
static long mshv_region_process_chunk(struct mshv_mem_region *region,
38+
u32 flags,
39+
u64 page_offset, u64 page_count,
40+
int (*handler)(struct mshv_mem_region *region,
41+
u32 flags,
42+
u64 page_offset,
43+
u64 page_count))
44+
{
45+
u64 count, stride;
46+
unsigned int page_order;
47+
struct page *page;
48+
int ret;
49+
50+
page = region->pages[page_offset];
51+
if (!page)
52+
return -EINVAL;
53+
54+
page_order = folio_order(page_folio(page));
55+
/* The hypervisor only supports 4K and 2M page sizes */
56+
if (page_order && page_order != HPAGE_PMD_ORDER)
57+
return -EINVAL;
58+
59+
stride = 1 << page_order;
60+
61+
/* Start at stride since the first page is validated */
62+
for (count = stride; count < page_count; count += stride) {
63+
page = region->pages[page_offset + count];
64+
65+
/* Break if current page is not present */
66+
if (!page)
67+
break;
68+
69+
/* Break if page size changes */
70+
if (page_order != folio_order(page_folio(page)))
71+
break;
72+
}
73+
74+
ret = handler(region, flags, page_offset, count);
75+
if (ret)
76+
return ret;
77+
78+
return count;
79+
}
80+
81+
/**
82+
* mshv_region_process_range - Processes a range of memory pages in a
83+
* region.
84+
* @region : Pointer to the memory region structure.
85+
* @flags : Flags to pass to the handler.
86+
* @page_offset: Offset into the region's pages array to start processing.
87+
* @page_count : Number of pages to process.
88+
* @handler : Callback function to handle each chunk of contiguous
89+
* pages.
90+
*
91+
* Iterates over the specified range of pages in @region, skipping
92+
* non-present pages. For each contiguous chunk of present pages, invokes
93+
* @handler via mshv_region_process_chunk.
94+
*
95+
* Note: The @handler callback must be able to handle both normal and huge
96+
* pages.
97+
*
98+
* Returns 0 on success, or a negative error code on failure.
99+
*/
100+
static int mshv_region_process_range(struct mshv_mem_region *region,
101+
u32 flags,
102+
u64 page_offset, u64 page_count,
103+
int (*handler)(struct mshv_mem_region *region,
104+
u32 flags,
105+
u64 page_offset,
106+
u64 page_count))
107+
{
108+
long ret;
109+
110+
if (page_offset + page_count > region->nr_pages)
111+
return -EINVAL;
112+
113+
while (page_count) {
114+
/* Skip non-present pages */
115+
if (!region->pages[page_offset]) {
116+
page_offset++;
117+
page_count--;
118+
continue;
119+
}
120+
121+
ret = mshv_region_process_chunk(region, flags,
122+
page_offset,
123+
page_count,
124+
handler);
125+
if (ret < 0)
126+
return ret;
127+
128+
page_offset += ret;
129+
page_count -= ret;
130+
}
131+
132+
return 0;
133+
}
134+
17135
struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
18136
u64 uaddr, u32 flags,
19137
bool is_mmio)
@@ -33,55 +151,86 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
33151
if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE))
34152
region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE;
35153

36-
/* Note: large_pages flag populated when we pin the pages */
37154
if (!is_mmio)
38155
region->flags.range_pinned = true;
39156

40157
return region;
41158
}
42159

160+
static int mshv_region_chunk_share(struct mshv_mem_region *region,
161+
u32 flags,
162+
u64 page_offset, u64 page_count)
163+
{
164+
struct page *page = region->pages[page_offset];
165+
166+
if (PageHuge(page) || PageTransCompound(page))
167+
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
168+
169+
return hv_call_modify_spa_host_access(region->partition->pt_id,
170+
region->pages + page_offset,
171+
page_count,
172+
HV_MAP_GPA_READABLE |
173+
HV_MAP_GPA_WRITABLE,
174+
flags, true);
175+
}
176+
43177
int mshv_region_share(struct mshv_mem_region *region)
44178
{
45179
u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED;
46180

47-
if (region->flags.large_pages)
181+
return mshv_region_process_range(region, flags,
182+
0, region->nr_pages,
183+
mshv_region_chunk_share);
184+
}
185+
186+
static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
187+
u32 flags,
188+
u64 page_offset, u64 page_count)
189+
{
190+
struct page *page = region->pages[page_offset];
191+
192+
if (PageHuge(page) || PageTransCompound(page))
48193
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
49194

50195
return hv_call_modify_spa_host_access(region->partition->pt_id,
51-
region->pages, region->nr_pages,
52-
HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE,
53-
flags, true);
196+
region->pages + page_offset,
197+
page_count, 0,
198+
flags, false);
54199
}
55200

56201
int mshv_region_unshare(struct mshv_mem_region *region)
57202
{
58203
u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE;
59204

60-
if (region->flags.large_pages)
61-
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
62-
63-
return hv_call_modify_spa_host_access(region->partition->pt_id,
64-
region->pages, region->nr_pages,
65-
0,
66-
flags, false);
205+
return mshv_region_process_range(region, flags,
206+
0, region->nr_pages,
207+
mshv_region_chunk_unshare);
67208
}
68209

69-
static int mshv_region_remap_pages(struct mshv_mem_region *region,
70-
u32 map_flags,
210+
static int mshv_region_chunk_remap(struct mshv_mem_region *region,
211+
u32 flags,
71212
u64 page_offset, u64 page_count)
72213
{
73-
if (page_offset + page_count > region->nr_pages)
74-
return -EINVAL;
214+
struct page *page = region->pages[page_offset];
75215

76-
if (region->flags.large_pages)
77-
map_flags |= HV_MAP_GPA_LARGE_PAGE;
216+
if (PageHuge(page) || PageTransCompound(page))
217+
flags |= HV_MAP_GPA_LARGE_PAGE;
78218

79219
return hv_call_map_gpa_pages(region->partition->pt_id,
80220
region->start_gfn + page_offset,
81-
page_count, map_flags,
221+
page_count, flags,
82222
region->pages + page_offset);
83223
}
84224

225+
static int mshv_region_remap_pages(struct mshv_mem_region *region,
226+
u32 map_flags,
227+
u64 page_offset, u64 page_count)
228+
{
229+
return mshv_region_process_range(region, map_flags,
230+
page_offset, page_count,
231+
mshv_region_chunk_remap);
232+
}
233+
85234
int mshv_region_map(struct mshv_mem_region *region)
86235
{
87236
u32 map_flags = region->hv_map_flags;
@@ -134,20 +283,37 @@ int mshv_region_pin(struct mshv_mem_region *region)
134283
goto release_pages;
135284
}
136285

137-
if (PageHuge(region->pages[0]))
138-
region->flags.large_pages = true;
139-
140286
return 0;
141287

142288
release_pages:
143289
mshv_region_invalidate_pages(region, 0, done_count);
144290
return ret;
145291
}
146292

293+
static int mshv_region_chunk_unmap(struct mshv_mem_region *region,
294+
u32 flags,
295+
u64 page_offset, u64 page_count)
296+
{
297+
struct page *page = region->pages[page_offset];
298+
299+
if (PageHuge(page) || PageTransCompound(page))
300+
flags |= HV_UNMAP_GPA_LARGE_PAGE;
301+
302+
return hv_call_unmap_gpa_pages(region->partition->pt_id,
303+
region->start_gfn + page_offset,
304+
page_count, flags);
305+
}
306+
307+
static int mshv_region_unmap(struct mshv_mem_region *region)
308+
{
309+
return mshv_region_process_range(region, 0,
310+
0, region->nr_pages,
311+
mshv_region_chunk_unmap);
312+
}
313+
147314
void mshv_region_destroy(struct mshv_mem_region *region)
148315
{
149316
struct mshv_partition *partition = region->partition;
150-
u32 unmap_flags = 0;
151317
int ret;
152318

153319
hlist_del(&region->hnode);
@@ -162,12 +328,7 @@ void mshv_region_destroy(struct mshv_mem_region *region)
162328
}
163329
}
164330

165-
if (region->flags.large_pages)
166-
unmap_flags |= HV_UNMAP_GPA_LARGE_PAGE;
167-
168-
/* ignore unmap failures and continue as process may be exiting */
169-
hv_call_unmap_gpa_pages(partition->pt_id, region->start_gfn,
170-
region->nr_pages, unmap_flags);
331+
mshv_region_unmap(region);
171332

172333
mshv_region_invalidate(region);
173334

drivers/hv/mshv_root.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ struct mshv_mem_region {
7777
u64 start_uaddr;
7878
u32 hv_map_flags;
7979
struct {
80-
u64 large_pages: 1; /* 2MiB */
8180
u64 range_pinned: 1;
82-
u64 reserved: 62;
81+
u64 reserved: 63;
8382
} flags;
8483
struct mshv_partition *partition;
8584
struct page *pages[];

0 commit comments

Comments
 (0)