Skip to content

Commit 1c46d04

Browse files
committed
Merge tag 'hyperv-fixes-signed-20240303' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv fixes from Wei Liu: - Multiple fixes, cleanups and documentations for Hyper-V core code and drivers * tag 'hyperv-fixes-signed-20240303' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: Drivers: hv: vmbus: make hv_bus const x86/hyperv: Allow 15-bit APIC IDs for VTL platforms x86/hyperv: Make encrypted/decrypted changes safe for load_unaligned_zeropad() x86/mm: Regularize set_memory_p() parameters and make non-static x86/hyperv: Use slow_virt_to_phys() in page transition hypervisor callback Documentation: hyperv: Add overview of PCI pass-thru device support Drivers: hv: vmbus: Update indentation in create_gpadl_header() Drivers: hv: vmbus: Remove duplication and cleanup code in create_gpadl_header() fbdev/hyperv_fb: Fix logic error for Gen2 VMs in hvfb_getmem() Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory hv_utils: Allow implicit ICTIMESYNCFLAG_SYNC
2 parents 90d35da + aa707b6 commit 1c46d04

11 files changed

Lines changed: 521 additions & 126 deletions

File tree

Documentation/virt/hyperv/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ Hyper-V Enlightenments
1010
overview
1111
vmbus
1212
clocks
13+
vpci

Documentation/virt/hyperv/vpci.rst

Lines changed: 316 additions & 0 deletions
Large diffs are not rendered by default.

arch/x86/hyperv/hv_vtl.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
extern struct boot_params boot_params;
1717
static struct real_mode_header hv_vtl_real_mode_header;
1818

19+
static bool __init hv_vtl_msi_ext_dest_id(void)
20+
{
21+
return true;
22+
}
23+
1924
void __init hv_vtl_init_platform(void)
2025
{
2126
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
@@ -38,6 +43,8 @@ void __init hv_vtl_init_platform(void)
3843
x86_platform.legacy.warm_reset = 0;
3944
x86_platform.legacy.reserve_bios_regions = 0;
4045
x86_platform.legacy.devices.pnpbios = 0;
46+
47+
x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id;
4148
}
4249

4350
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)

arch/x86/hyperv/ivm.c

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/io.h>
1616
#include <asm/coco.h>
1717
#include <asm/mem_encrypt.h>
18+
#include <asm/set_memory.h>
1819
#include <asm/mshyperv.h>
1920
#include <asm/hypervisor.h>
2021
#include <asm/mtrr.h>
@@ -502,6 +503,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
502503
return -EFAULT;
503504
}
504505

506+
/*
507+
* When transitioning memory between encrypted and decrypted, the caller
508+
* of set_memory_encrypted() or set_memory_decrypted() is responsible for
509+
* ensuring that the memory isn't in use and isn't referenced while the
510+
* transition is in progress. The transition has multiple steps, and the
511+
* memory is in an inconsistent state until all steps are complete. A
512+
* reference while the state is inconsistent could result in an exception
513+
* that can't be cleanly fixed up.
514+
*
515+
* But the Linux kernel load_unaligned_zeropad() mechanism could cause a
516+
* stray reference that can't be prevented by the caller, so Linux has
517+
* specific code to handle this case. But when the #VC and #VE exceptions
518+
* routed to a paravisor, the specific code doesn't work. To avoid this
519+
* problem, mark the pages as "not present" while the transition is in
520+
* progress. If load_unaligned_zeropad() causes a stray reference, a normal
521+
* page fault is generated instead of #VC or #VE, and the page-fault-based
522+
* handlers for load_unaligned_zeropad() resolve the reference. When the
523+
* transition is complete, hv_vtom_set_host_visibility() marks the pages
524+
* as "present" again.
525+
*/
526+
static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
527+
{
528+
return !set_memory_np(kbuffer, pagecount);
529+
}
530+
505531
/*
506532
* hv_vtom_set_host_visibility - Set specified memory visible to host.
507533
*
@@ -515,16 +541,28 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
515541
enum hv_mem_host_visibility visibility = enc ?
516542
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
517543
u64 *pfn_array;
544+
phys_addr_t paddr;
545+
void *vaddr;
518546
int ret = 0;
519547
bool result = true;
520548
int i, pfn;
521549

522550
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
523-
if (!pfn_array)
524-
return false;
551+
if (!pfn_array) {
552+
result = false;
553+
goto err_set_memory_p;
554+
}
525555

526556
for (i = 0, pfn = 0; i < pagecount; i++) {
527-
pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE);
557+
/*
558+
* Use slow_virt_to_phys() because the PRESENT bit has been
559+
* temporarily cleared in the PTEs. slow_virt_to_phys() works
560+
* without the PRESENT bit while virt_to_hvpfn() or similar
561+
* does not.
562+
*/
563+
vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE);
564+
paddr = slow_virt_to_phys(vaddr);
565+
pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT;
528566
pfn++;
529567

530568
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
@@ -538,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
538576
}
539577
}
540578

541-
err_free_pfn_array:
579+
err_free_pfn_array:
542580
kfree(pfn_array);
581+
582+
err_set_memory_p:
583+
/*
584+
* Set the PTE PRESENT bits again to revert what hv_vtom_clear_present()
585+
* did. Do this even if there is an error earlier in this function in
586+
* order to avoid leaving the memory range in a "broken" state. Setting
587+
* the PRESENT bits shouldn't fail, but return an error if it does.
588+
*/
589+
if (set_memory_p(kbuffer, pagecount))
590+
result = false;
591+
543592
return result;
544593
}
545594

546595
static bool hv_vtom_tlb_flush_required(bool private)
547596
{
548-
return true;
597+
/*
598+
* Since hv_vtom_clear_present() marks the PTEs as "not present"
599+
* and flushes the TLB, they can't be in the TLB. That makes the
600+
* flush controlled by this function redundant, so return "false".
601+
*/
602+
return false;
549603
}
550604

551605
static bool hv_vtom_cache_flush_required(void)
@@ -608,6 +662,7 @@ void __init hv_vtom_init(void)
608662
x86_platform.hyper.is_private_mmio = hv_is_private_mmio;
609663
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
610664
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
665+
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
611666
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
612667

613668
/* Set WB as the default cache mode. */

arch/x86/include/asm/set_memory.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages);
4747
int set_memory_wc(unsigned long addr, int numpages);
4848
int set_memory_wb(unsigned long addr, int numpages);
4949
int set_memory_np(unsigned long addr, int numpages);
50+
int set_memory_p(unsigned long addr, int numpages);
5051
int set_memory_4k(unsigned long addr, int numpages);
5152
int set_memory_encrypted(unsigned long addr, int numpages);
5253
int set_memory_decrypted(unsigned long addr, int numpages);

arch/x86/mm/pat/set_memory.c

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
755755
* areas on 32-bit NUMA systems. The percpu areas can
756756
* end up in this kind of memory, for instance.
757757
*
758-
* This could be optimized, but it is only intended to be
759-
* used at initialization time, and keeping it
760-
* unoptimized should increase the testing coverage for
761-
* the more obscure platforms.
758+
* Note that as long as the PTEs are well-formed with correct PFNs, this
759+
* works without checking the PRESENT bit in the leaf PTE. This is unlike
760+
* the similar vmalloc_to_page() and derivatives. Callers may depend on
761+
* this behavior.
762+
*
763+
* This could be optimized, but it is only used in paths that are not perf
764+
* sensitive, and keeping it unoptimized should increase the testing coverage
765+
* for the more obscure platforms.
762766
*/
763767
phys_addr_t slow_virt_to_phys(void *__virt_addr)
764768
{
@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
20412045
return rc;
20422046
}
20432047

2044-
static int set_memory_p(unsigned long *addr, int numpages)
2045-
{
2046-
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
2047-
}
2048-
20492048
/* Restore full speculative operation to the pfn. */
20502049
int clear_mce_nospec(unsigned long pfn)
20512050
{
20522051
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
20532052

2054-
return set_memory_p(&addr, 1);
2053+
return set_memory_p(addr, 1);
20552054
}
20562055
EXPORT_SYMBOL_GPL(clear_mce_nospec);
20572056
#endif /* CONFIG_X86_64 */
@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
21042103
CPA_NO_CHECK_ALIAS, NULL);
21052104
}
21062105

2106+
int set_memory_p(unsigned long addr, int numpages)
2107+
{
2108+
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
2109+
}
2110+
21072111
int set_memory_4k(unsigned long addr, int numpages)
21082112
{
21092113
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),

drivers/hv/channel.c

Lines changed: 70 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -322,125 +322,89 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
322322

323323
pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT;
324324

325-
/* do we need a gpadl body msg */
326325
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
327326
sizeof(struct vmbus_channel_gpadl_header) -
328327
sizeof(struct gpa_range);
328+
pfncount = umin(pagecount, pfnsize / sizeof(u64));
329+
330+
msgsize = sizeof(struct vmbus_channel_msginfo) +
331+
sizeof(struct vmbus_channel_gpadl_header) +
332+
sizeof(struct gpa_range) + pfncount * sizeof(u64);
333+
msgheader = kzalloc(msgsize, GFP_KERNEL);
334+
if (!msgheader)
335+
return -ENOMEM;
336+
337+
INIT_LIST_HEAD(&msgheader->submsglist);
338+
msgheader->msgsize = msgsize;
339+
340+
gpadl_header = (struct vmbus_channel_gpadl_header *)
341+
msgheader->msg;
342+
gpadl_header->rangecount = 1;
343+
gpadl_header->range_buflen = sizeof(struct gpa_range) +
344+
pagecount * sizeof(u64);
345+
gpadl_header->range[0].byte_offset = 0;
346+
gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
347+
for (i = 0; i < pfncount; i++)
348+
gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
349+
type, kbuffer, size, send_offset, i);
350+
*msginfo = msgheader;
351+
352+
pfnsum = pfncount;
353+
pfnleft = pagecount - pfncount;
354+
355+
/* how many pfns can we fit in a body message */
356+
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
357+
sizeof(struct vmbus_channel_gpadl_body);
329358
pfncount = pfnsize / sizeof(u64);
330359

331-
if (pagecount > pfncount) {
332-
/* we need a gpadl body */
333-
/* fill in the header */
360+
/*
361+
* If pfnleft is zero, everything fits in the header and no body
362+
* messages are needed
363+
*/
364+
while (pfnleft) {
365+
pfncurr = umin(pfncount, pfnleft);
334366
msgsize = sizeof(struct vmbus_channel_msginfo) +
335-
sizeof(struct vmbus_channel_gpadl_header) +
336-
sizeof(struct gpa_range) + pfncount * sizeof(u64);
337-
msgheader = kzalloc(msgsize, GFP_KERNEL);
338-
if (!msgheader)
339-
goto nomem;
340-
341-
INIT_LIST_HEAD(&msgheader->submsglist);
342-
msgheader->msgsize = msgsize;
343-
344-
gpadl_header = (struct vmbus_channel_gpadl_header *)
345-
msgheader->msg;
346-
gpadl_header->rangecount = 1;
347-
gpadl_header->range_buflen = sizeof(struct gpa_range) +
348-
pagecount * sizeof(u64);
349-
gpadl_header->range[0].byte_offset = 0;
350-
gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
351-
for (i = 0; i < pfncount; i++)
352-
gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
353-
type, kbuffer, size, send_offset, i);
354-
*msginfo = msgheader;
355-
356-
pfnsum = pfncount;
357-
pfnleft = pagecount - pfncount;
358-
359-
/* how many pfns can we fit */
360-
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
361-
sizeof(struct vmbus_channel_gpadl_body);
362-
pfncount = pfnsize / sizeof(u64);
363-
364-
/* fill in the body */
365-
while (pfnleft) {
366-
if (pfnleft > pfncount)
367-
pfncurr = pfncount;
368-
else
369-
pfncurr = pfnleft;
370-
371-
msgsize = sizeof(struct vmbus_channel_msginfo) +
372-
sizeof(struct vmbus_channel_gpadl_body) +
373-
pfncurr * sizeof(u64);
374-
msgbody = kzalloc(msgsize, GFP_KERNEL);
375-
376-
if (!msgbody) {
377-
struct vmbus_channel_msginfo *pos = NULL;
378-
struct vmbus_channel_msginfo *tmp = NULL;
379-
/*
380-
* Free up all the allocated messages.
381-
*/
382-
list_for_each_entry_safe(pos, tmp,
383-
&msgheader->submsglist,
384-
msglistentry) {
385-
386-
list_del(&pos->msglistentry);
387-
kfree(pos);
388-
}
389-
390-
goto nomem;
391-
}
392-
393-
msgbody->msgsize = msgsize;
394-
gpadl_body =
395-
(struct vmbus_channel_gpadl_body *)msgbody->msg;
367+
sizeof(struct vmbus_channel_gpadl_body) +
368+
pfncurr * sizeof(u64);
369+
msgbody = kzalloc(msgsize, GFP_KERNEL);
396370

371+
if (!msgbody) {
372+
struct vmbus_channel_msginfo *pos = NULL;
373+
struct vmbus_channel_msginfo *tmp = NULL;
397374
/*
398-
* Gpadl is u32 and we are using a pointer which could
399-
* be 64-bit
400-
* This is governed by the guest/host protocol and
401-
* so the hypervisor guarantees that this is ok.
375+
* Free up all the allocated messages.
402376
*/
403-
for (i = 0; i < pfncurr; i++)
404-
gpadl_body->pfn[i] = hv_gpadl_hvpfn(type,
405-
kbuffer, size, send_offset, pfnsum + i);
406-
407-
/* add to msg header */
408-
list_add_tail(&msgbody->msglistentry,
409-
&msgheader->submsglist);
410-
pfnsum += pfncurr;
411-
pfnleft -= pfncurr;
377+
list_for_each_entry_safe(pos, tmp,
378+
&msgheader->submsglist,
379+
msglistentry) {
380+
381+
list_del(&pos->msglistentry);
382+
kfree(pos);
383+
}
384+
kfree(msgheader);
385+
return -ENOMEM;
412386
}
413-
} else {
414-
/* everything fits in a header */
415-
msgsize = sizeof(struct vmbus_channel_msginfo) +
416-
sizeof(struct vmbus_channel_gpadl_header) +
417-
sizeof(struct gpa_range) + pagecount * sizeof(u64);
418-
msgheader = kzalloc(msgsize, GFP_KERNEL);
419-
if (msgheader == NULL)
420-
goto nomem;
421-
422-
INIT_LIST_HEAD(&msgheader->submsglist);
423-
msgheader->msgsize = msgsize;
424-
425-
gpadl_header = (struct vmbus_channel_gpadl_header *)
426-
msgheader->msg;
427-
gpadl_header->rangecount = 1;
428-
gpadl_header->range_buflen = sizeof(struct gpa_range) +
429-
pagecount * sizeof(u64);
430-
gpadl_header->range[0].byte_offset = 0;
431-
gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
432-
for (i = 0; i < pagecount; i++)
433-
gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
434-
type, kbuffer, size, send_offset, i);
435-
436-
*msginfo = msgheader;
387+
388+
msgbody->msgsize = msgsize;
389+
gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg;
390+
391+
/*
392+
* Gpadl is u32 and we are using a pointer which could
393+
* be 64-bit
394+
* This is governed by the guest/host protocol and
395+
* so the hypervisor guarantees that this is ok.
396+
*/
397+
for (i = 0; i < pfncurr; i++)
398+
gpadl_body->pfn[i] = hv_gpadl_hvpfn(type,
399+
kbuffer, size, send_offset, pfnsum + i);
400+
401+
/* add to msg header */
402+
list_add_tail(&msgbody->msglistentry, &msgheader->submsglist);
403+
pfnsum += pfncurr;
404+
pfnleft -= pfncurr;
437405
}
438406

439407
return 0;
440-
nomem:
441-
kfree(msgheader);
442-
kfree(msgbody);
443-
return -ENOMEM;
444408
}
445409

446410
/*

0 commit comments

Comments
 (0)