|
7 | 7 | #include <linux/cpufeature.h> |
8 | 8 | #include <linux/export.h> |
9 | 9 | #include <linux/io.h> |
| 10 | +#include <linux/kexec.h> |
10 | 11 | #include <asm/coco.h> |
11 | 12 | #include <asm/tdx.h> |
12 | 13 | #include <asm/vmx.h> |
13 | 14 | #include <asm/ia32.h> |
14 | 15 | #include <asm/insn.h> |
15 | 16 | #include <asm/insn-eval.h> |
16 | 17 | #include <asm/pgtable.h> |
| 18 | +#include <asm/set_memory.h> |
17 | 19 |
|
18 | 20 | /* MMIO direction */ |
19 | 21 | #define EPT_READ 0 |
|
38 | 40 |
|
39 | 41 | #define TDREPORT_SUBTYPE_0 0 |
40 | 42 |
|
| 43 | +static atomic_long_t nr_shared; |
| 44 | + |
41 | 45 | /* Called from __tdx_hypercall() for unrecoverable failure */ |
42 | 46 | noinstr void __noreturn __tdx_hypercall_failed(void) |
43 | 47 | { |
@@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) |
798 | 802 | return true; |
799 | 803 | } |
800 | 804 |
|
801 | | -static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, |
802 | | - bool enc) |
| 805 | +static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, |
| 806 | + bool enc) |
803 | 807 | { |
804 | 808 | /* |
805 | 809 | * Only handle shared->private conversion here. |
806 | 810 | * See the comment in tdx_early_init(). |
807 | 811 | */ |
808 | | - if (enc) |
809 | | - return tdx_enc_status_changed(vaddr, numpages, enc); |
810 | | - return true; |
| 812 | + if (enc && !tdx_enc_status_changed(vaddr, numpages, enc)) |
| 813 | + return -EIO; |
| 814 | + |
| 815 | + return 0; |
811 | 816 | } |
812 | 817 |
|
813 | | -static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, |
| 818 | +static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, |
814 | 819 | bool enc) |
815 | 820 | { |
816 | 821 | /* |
817 | 822 | * Only handle private->shared conversion here. |
818 | 823 | * See the comment in tdx_early_init(). |
819 | 824 | */ |
820 | | - if (!enc) |
821 | | - return tdx_enc_status_changed(vaddr, numpages, enc); |
822 | | - return true; |
| 825 | + if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc)) |
| 826 | + return -EIO; |
| 827 | + |
| 828 | + if (enc) |
| 829 | + atomic_long_sub(numpages, &nr_shared); |
| 830 | + else |
| 831 | + atomic_long_add(numpages, &nr_shared); |
| 832 | + |
| 833 | + return 0; |
| 834 | +} |
| 835 | + |
| 836 | +/* Stop new private<->shared conversions */ |
| 837 | +static void tdx_kexec_begin(void) |
| 838 | +{ |
| 839 | + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) |
| 840 | + return; |
| 841 | + |
| 842 | + /* |
| 843 | + * Crash kernel reaches here with interrupts disabled: can't wait for |
| 844 | + * conversions to finish. |
| 845 | + * |
| 846 | + * If race happened, just report and proceed. |
| 847 | + */ |
| 848 | + if (!set_memory_enc_stop_conversion()) |
| 849 | + pr_warn("Failed to stop shared<->private conversions\n"); |
| 850 | +} |
| 851 | + |
| 852 | +/* Walk direct mapping and convert all shared memory back to private */ |
| 853 | +static void tdx_kexec_finish(void) |
| 854 | +{ |
| 855 | + unsigned long addr, end; |
| 856 | + long found = 0, shared; |
| 857 | + |
| 858 | + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) |
| 859 | + return; |
| 860 | + |
| 861 | + lockdep_assert_irqs_disabled(); |
| 862 | + |
| 863 | + addr = PAGE_OFFSET; |
| 864 | + end = PAGE_OFFSET + get_max_mapped(); |
| 865 | + |
| 866 | + while (addr < end) { |
| 867 | + unsigned long size; |
| 868 | + unsigned int level; |
| 869 | + pte_t *pte; |
| 870 | + |
| 871 | + pte = lookup_address(addr, &level); |
| 872 | + size = page_level_size(level); |
| 873 | + |
| 874 | + if (pte && pte_decrypted(*pte)) { |
| 875 | + int pages = size / PAGE_SIZE; |
| 876 | + |
| 877 | + /* |
| 878 | + * Touching memory with shared bit set triggers implicit |
| 879 | + * conversion to shared. |
| 880 | + * |
| 881 | + * Make sure nobody touches the shared range from |
| 882 | + * now on. |
| 883 | + */ |
| 884 | + set_pte(pte, __pte(0)); |
| 885 | + |
| 886 | + /* |
| 887 | + * Memory encryption state persists across kexec. |
| 888 | + * If tdx_enc_status_changed() fails in the first |
| 889 | + * kernel, it leaves memory in an unknown state. |
| 890 | + * |
| 891 | + * If that memory remains shared, accessing it in the |
| 892 | + * *next* kernel through a private mapping will result |
| 893 | + * in an unrecoverable guest shutdown. |
| 894 | + * |
| 895 | + * The kdump kernel boot is not impacted as it uses |
| 896 | + * a pre-reserved memory range that is always private. |
| 897 | + * However, gathering crash information could lead to |
| 898 | + * a crash if it accesses unconverted memory through |
| 899 | + * a private mapping which is possible when accessing |
| 900 | + * that memory through /proc/vmcore, for example. |
| 901 | + * |
| 902 | + * In all cases, print error info in order to leave |
| 903 | + * enough bread crumbs for debugging. |
| 904 | + */ |
| 905 | + if (!tdx_enc_status_changed(addr, pages, true)) { |
| 906 | + pr_err("Failed to unshare range %#lx-%#lx\n", |
| 907 | + addr, addr + size); |
| 908 | + } |
| 909 | + |
| 910 | + found += pages; |
| 911 | + } |
| 912 | + |
| 913 | + addr += size; |
| 914 | + } |
| 915 | + |
| 916 | + __flush_tlb_all(); |
| 917 | + |
| 918 | + shared = atomic_long_read(&nr_shared); |
| 919 | + if (shared != found) { |
| 920 | + pr_err("shared page accounting is off\n"); |
| 921 | + pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); |
| 922 | + } |
823 | 923 | } |
824 | 924 |
|
825 | 925 | void __init tdx_early_init(void) |
@@ -881,6 +981,9 @@ void __init tdx_early_init(void) |
881 | 981 | x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; |
882 | 982 | x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; |
883 | 983 |
|
| 984 | + x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; |
| 985 | + x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; |
| 986 | + |
884 | 987 | /* |
885 | 988 | * TDX intercepts the RDMSR to read the X2APIC ID in the parallel |
886 | 989 | * bringup low level code. That raises #VE which cannot be handled |
|
0 commit comments