|
7 | 7 | #include <linux/cpufeature.h> |
8 | 8 | #include <linux/export.h> |
9 | 9 | #include <linux/io.h> |
| 10 | +#include <linux/kexec.h> |
10 | 11 | #include <asm/coco.h> |
11 | 12 | #include <asm/tdx.h> |
12 | 13 | #include <asm/vmx.h> |
13 | 14 | #include <asm/ia32.h> |
14 | 15 | #include <asm/insn.h> |
15 | 16 | #include <asm/insn-eval.h> |
16 | 17 | #include <asm/pgtable.h> |
| 18 | +#include <asm/set_memory.h> |
17 | 19 |
|
18 | 20 | /* MMIO direction */ |
19 | 21 | #define EPT_READ 0 |
@@ -831,6 +833,95 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, |
831 | 833 | return 0; |
832 | 834 | } |
833 | 835 |
|
| 836 | +/* Stop new private<->shared conversions */ |
| 837 | +static void tdx_kexec_begin(void) |
| 838 | +{ |
| 839 | + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) |
| 840 | + return; |
| 841 | + |
| 842 | + /* |
| 843 | + * Crash kernel reaches here with interrupts disabled: can't wait for |
| 844 | + * conversions to finish. |
| 845 | + * |
| 846 | + * If race happened, just report and proceed. |
| 847 | + */ |
| 848 | + if (!set_memory_enc_stop_conversion()) |
| 849 | + pr_warn("Failed to stop shared<->private conversions\n"); |
| 850 | +} |
| 851 | + |
| 852 | +/* Walk direct mapping and convert all shared memory back to private */ |
| 853 | +static void tdx_kexec_finish(void) |
| 854 | +{ |
| 855 | + unsigned long addr, end; |
| 856 | + long found = 0, shared; |
| 857 | + |
| 858 | + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) |
| 859 | + return; |
| 860 | + |
| 861 | + lockdep_assert_irqs_disabled(); |
| 862 | + |
| 863 | + addr = PAGE_OFFSET; |
| 864 | + end = PAGE_OFFSET + get_max_mapped(); |
| 865 | + |
| 866 | + while (addr < end) { |
| 867 | + unsigned long size; |
| 868 | + unsigned int level; |
| 869 | + pte_t *pte; |
| 870 | + |
| 871 | + pte = lookup_address(addr, &level); |
| 872 | + size = page_level_size(level); |
| 873 | + |
| 874 | + if (pte && pte_decrypted(*pte)) { |
| 875 | + int pages = size / PAGE_SIZE; |
| 876 | + |
| 877 | + /* |
| 878 | + * Touching memory with shared bit set triggers implicit |
| 879 | + * conversion to shared. |
| 880 | + * |
| 881 | + * Make sure nobody touches the shared range from |
| 882 | + * now on. |
| 883 | + */ |
| 884 | + set_pte(pte, __pte(0)); |
| 885 | + |
| 886 | + /* |
| 887 | + * Memory encryption state persists across kexec. |
| 888 | + * If tdx_enc_status_changed() fails in the first |
| 889 | + * kernel, it leaves memory in an unknown state. |
| 890 | + * |
| 891 | + * If that memory remains shared, accessing it in the |
| 892 | + * *next* kernel through a private mapping will result |
| 893 | + * in an unrecoverable guest shutdown. |
| 894 | + * |
| 895 | + * The kdump kernel boot is not impacted as it uses |
| 896 | + * a pre-reserved memory range that is always private. |
| 897 | + * However, gathering crash information could lead to |
| 898 | + * a crash if it accesses unconverted memory through |
| 899 | + * a private mapping which is possible when accessing |
| 900 | + * that memory through /proc/vmcore, for example. |
| 901 | + * |
| 902 | + * In all cases, print error info in order to leave |
| 903 | + * enough bread crumbs for debugging. |
| 904 | + */ |
| 905 | + if (!tdx_enc_status_changed(addr, pages, true)) { |
| 906 | + pr_err("Failed to unshare range %#lx-%#lx\n", |
| 907 | + addr, addr + size); |
| 908 | + } |
| 909 | + |
| 910 | + found += pages; |
| 911 | + } |
| 912 | + |
| 913 | + addr += size; |
| 914 | + } |
| 915 | + |
| 916 | + __flush_tlb_all(); |
| 917 | + |
| 918 | + shared = atomic_long_read(&nr_shared); |
| 919 | + if (shared != found) { |
| 920 | + pr_err("shared page accounting is off\n"); |
| 921 | + pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); |
| 922 | + } |
| 923 | +} |
| 924 | + |
834 | 925 | void __init tdx_early_init(void) |
835 | 926 | { |
836 | 927 | struct tdx_module_args args = { |
@@ -890,6 +981,9 @@ void __init tdx_early_init(void) |
890 | 981 | x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; |
891 | 982 | x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; |
892 | 983 |
|
| 984 | + x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; |
| 985 | + x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; |
| 986 | + |
893 | 987 | /* |
894 | 988 | * TDX intercepts the RDMSR to read the X2APIC ID in the parallel |
895 | 989 | * bringup low level code. That raises #VE which cannot be handled |
|
0 commit comments